Quick overview

Current status

#library(nCov2019)
library(leaflet)
library(dplyr)
library(ggplot2)
library(plotly)
library(scales)
library(xts)
library(dygraphs)
library(corrplot)
library(lubridate)
library(fmsb)
library(forecast)
COVID<-read.csv("covid_19_data.csv")
COVID_2<-read.csv("COVID19_13-Apr.csv")

Format date:

Date<-as.Date(COVID_2$Date, format="%m/%d/%y") 

COVID_2$Date2<-Date
COVID_updated<-COVID_2 %>% filter(Date2==max(Date2))
leaflet(width = "100%") %>% 
  addProviderTiles("CartoDB.DarkMatter") %>% 
  setView(lng = 0, lat = 10, zoom = 1.5) %>% 
  addCircleMarkers(data = COVID_updated, 
                   lng = ~ Long,
                   lat = ~ Lat,
                   radius = ~ log(Confirmed+1),
                   color = rgb(218/255,65/255,56/255),
                   fillOpacity = ~ ifelse(Confirmed > 0, 1, 0),
                   stroke = FALSE,
                   label = ~ paste(Province.State,",",Country.Region, ": ", Confirmed)
                   )

Current top 10 countries:

COVID_top<-COVID_2 %>% filter(Date2==max(Date2)) %>% 
  group_by(Country.Region) %>% summarise(Total_confirmed=sum(Confirmed)) %>% 
  top_n(10,Total_confirmed) %>% arrange(desc(Total_confirmed))
plot<-ggplot(data=COVID_top
       , aes(x=Total_confirmed,y=reorder(Country.Region,Total_confirmed))) +
  geom_bar(stat ="identity",alpha=0.8,fill="firebrick3") +
  geom_text(aes(label=Total_confirmed), vjust=0.5, hjust=0.9,color="black", size=3.5) +
  scale_x_continuous(labels = comma) +
  labs(title = paste("Top 10 countries with confirmed cases as of ",max(COVID_2$Date2)),
       x = "Confirmed cases",
       y = "Country") +
  theme_minimal()

ggplotly(plot,tooltip = c("x"),width=750)

Time distribution:

COVID_2_Day<- COVID_2 %>% group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed),
                                                        World_deaths=sum(Deaths),
                                                        World_recovered=sum(Recovered))


COVID_Day_confirmed_series<-xts(COVID_2_Day$World_confirmed, order.by=COVID_2_Day$Date2)
COVID_Day_deaths_series<-xts(COVID_2_Day$World_deaths, order.by=COVID_2_Day$Date2)
COVID_Day_recovered_series<-xts(COVID_2_Day$World_recovered, order.by=COVID_2_Day$Date2)

Day_summary<-cbind(COVID_Day_confirmed_series,COVID_Day_deaths_series,COVID_Day_recovered_series)
dygraph(Day_summary, main = "SARS-COV2-outbreak: Total worldwide cases", 
        xlab="Date", ylab="Total cases",width = 750) %>% 
  dySeries("COVID_Day_confirmed_series", "Total cases",drawPoints = TRUE, 
           pointSize = 3, color=rgb(53/255,116/255,199/255)) %>% 
  dySeries("COVID_Day_deaths_series", "Total deaths",drawPoints = TRUE, 
           pointSize = 3, color=rgb(189/255,55/255,48/255)) %>% 
  dySeries("COVID_Day_recovered_series", "Total recovered",drawPoints = TRUE, 
           pointSize = 3, color=rgb(69/255,136/255,51/255)) %>% 
  dyRangeSelector()
New_count<-function(x)
{
  Daily_cases<-numeric(length(x))
  
  for(i in length(x):2)
  {
    Daily_cases[i]=x[i] - x[i-1]
  }
  return(Daily_cases)
}

New_cases<-New_count(COVID_2_Day$World_confirmed)
New_deaths<-New_count(COVID_2_Day$World_deaths)
New_recovered<-New_count(COVID_2_Day$World_recovered)
COVID_New_confirmed_series<-xts(New_cases, order.by=COVID_2_Day$Date2)
COVID_New_deaths_series<-xts(New_deaths, order.by=COVID_2_Day$Date2)
COVID_New_recovered_series<-xts(New_recovered, order.by=COVID_2_Day$Date2)

New_summary<-cbind(COVID_New_confirmed_series,COVID_New_deaths_series,COVID_New_recovered_series)
dygraph(New_summary, main = "SARS-COV2-outbreak: Total worldwide cases", 
        xlab="Date", ylab="Novel coronavirus cases",width = 750) %>% 
  dySeries("COVID_New_confirmed_series", "New cases",drawPoints = TRUE, 
           pointSize = 3, color=rgb(53/255,116/255,199/255)) %>% 
  dySeries("COVID_New_deaths_series", "New deaths",drawPoints = TRUE, 
           pointSize = 3, color=rgb(189/255,55/255,48/255)) %>% 
  dySeries("COVID_New_recovered_series", "New recovered",drawPoints = TRUE, 
           pointSize = 3, color=rgb(69/255,136/255,51/255)) %>% 
  dyRangeSelector()

Team members countries total cases:

COVID_2_Day_Lebanon<- COVID_2 %>% 
  filter(Country.Region %in% c("Lebanon")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Chile<- COVID_2 %>% 
  filter(Country.Region %in% c("Chile")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Colombia<- COVID_2 %>% 
  filter(Country.Region %in% c("Colombia")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_CostaRica<- COVID_2 %>% 
  filter(Country.Region %in% c("Costa Rica")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))


COVID_Day_series_Lebanon<-xts(COVID_2_Day_Lebanon$World_confirmed, order.by=COVID_2_Day_Lebanon$Date2)
COVID_Day_series_Chile<-xts(COVID_2_Day_Chile$World_confirmed, order.by=COVID_2_Day_Chile$Date2)
COVID_Day_series_Colombia<-xts(COVID_2_Day_Colombia$World_confirmed, order.by=COVID_2_Day_Colombia$Date2)
COVID_Day_series_CostaRica<-xts(COVID_2_Day_CostaRica$World_confirmed, order.by=COVID_2_Day_CostaRica$Date2)

Our_Countries<-cbind(COVID_Day_series_Lebanon,COVID_Day_series_Chile,COVID_Day_series_Colombia,COVID_Day_series_CostaRica)
dygraph(Our_Countries, main = "SARS-COV2-outbreak: Total cases by country", xlab="Date", ylab="Total cases",width = 750) %>% 
  dySeries("COVID_Day_series_Lebanon", "Lebanon",drawPoints = TRUE, 
           pointSize = 3, color=rgb(0,0,3/255)) %>% 
  dySeries("COVID_Day_series_Chile", "Chile",drawPoints = TRUE, 
           pointSize = 3,color=rgb(120/255,28/255,109/255)) %>% 
  dySeries("COVID_Day_series_Colombia", "Colombia",drawPoints = TRUE, 
           pointSize = 3,color=rgb(237/255,105/255,37/255)) %>% 
  dySeries("COVID_Day_series_CostaRica", "Costa Rica",drawPoints = TRUE,
           pointSize = 3,color=rgb(204/255,197/255,126/255)) %>% 
  dyRangeSelector()
New_Lebanon<-New_count(COVID_2_Day_Lebanon$World_confirmed)
New_Chile<-New_count(COVID_2_Day_Chile$World_confirmed)
New_Colombia<-New_count(COVID_2_Day_Colombia$World_confirmed)
New_CostaRica<-New_count(COVID_2_Day_CostaRica$World_confirmed)

COVID_New_series_Lebanon<-xts(New_Lebanon, order.by=COVID_2_Day_Lebanon$Date2)
COVID_New_series_Chile<-xts(New_Chile, order.by=COVID_2_Day_Chile$Date2)
COVID_New_series_Colombia<-xts(New_Colombia, order.by=COVID_2_Day_Colombia$Date2)
COVID_New_series_CostaRica<-xts(New_CostaRica, order.by=COVID_2_Day_CostaRica$Date2)

Our_New_Countries<-cbind(COVID_New_series_Lebanon,COVID_New_series_Chile,COVID_New_series_Colombia,COVID_New_series_CostaRica)
dygraph(Our_New_Countries, main = "SARS-COV2-outbreak: New cases by country", xlab="Date", ylab="Total cases",width = 750) %>% 
  dySeries("COVID_New_series_Lebanon", "Lebanon",drawPoints = TRUE, 
           pointSize = 3, color=rgb(0,0,3/255)) %>% 
  dySeries("COVID_New_series_Chile", "Chile",drawPoints = TRUE, 
           pointSize = 3,color=rgb(120/255,28/255,109/255)) %>% 
  dySeries("COVID_New_series_Colombia", "Colombia",drawPoints = TRUE, 
           pointSize = 3,color=rgb(237/255,105/255,37/255)) %>% 
  dySeries("COVID_New_series_CostaRica", "Costa Rica",drawPoints = TRUE,
           pointSize = 3,color=rgb(204/255,197/255,126/255)) %>% 
  dyRangeSelector()

Looking for correlations

fig <- plot_ly(COVID_updated, x = ~Confirmed, y = ~Deaths, z = ~Recovered, width=750) %>% 
  add_markers(text= ~Country.Region ,hoverinfo= "text",
              marker = list(color=rgb(189/255,55/255,48/255))) %>% 
  layout(title="Confirmed cases Vs. Deaths Vs. Recovered", scene = list(
                    xaxis = list(title = 'Confirmed'),
                     yaxis = list(title = 'Deaths'),
                     zaxis = list(title = 'Recovered'))) 
fig

For the number of cases

Human Development Index

HDI<-read.csv("Human Development Index (HDI)_2.csv",sep=";",dec=",")
COVID_Country<-COVID_2 %>% filter(Date2==max(Date2)) %>% 
  group_by(Country.Region) %>% summarise(Total_confirmed=sum(Confirmed),
                                         Total_deaths=sum(Deaths),
                                         Total_Recovered=sum(Recovered))

Remove after parentheses:

HDI$Country_2<-gsub("\\s*\\([^\\)]+\\)","",as.character(HDI$Country))
HDI$Country_2[HDI$Country_2=="United States"]<-"US"
HDI$Country_2[HDI$Country_2=="Korea"]<-"South Korea"

Population:

Population<-read.csv("World_population.csv",sep=";",dec=",")

Remove after commma:

Population$Country_Name_2<-gsub(",.*", "", as.character(Population$Country_Name))
Population$Country_Name_2[Population$Country_Name_2=="United States"]<-"US"
Population$Country_Name_2[Population$Country_Code=="KOR"]<-"South Korea"
Population$Country_Name_2[Population$Country_Code=="CZE"]<-"Czechia"

Natural Join:

COVID_3<- COVID_Country %>% inner_join(HDI,by=c("Country.Region"="Country_2")) %>% 
  inner_join(Population,by=c("Country.Region"="Country_Name_2")) %>% 
  select(Country.Region,Total_confirmed,Total_deaths,Total_Recovered,HDI_Rank_2018,Year_2018,
         Country_Code,Population_2018) %>% 
  mutate(Cases_million=(Total_confirmed/Population_2018)*1000000,
         Recovered_percentage=(Total_Recovered/Total_confirmed)*100)  

COVID_3<-COVID_3[!is.na(COVID_3$Population_2018),]

Countries with top 10 cases per million inhabitants

COVID3_top<-COVID_3 %>% top_n(10,Cases_million) %>% arrange(desc(Cases_million)) %>% 
  mutate(Cases_million=round(Cases_million,0))
plot<-ggplot(data=COVID3_top
       , aes(x=Cases_million,y=reorder(Country.Region,Cases_million))) +
  geom_bar(stat ="identity",alpha=0.8,fill="sky blue", ) +
  geom_text(aes(label=Cases_million), vjust=0.5, hjust=0.9,color="black", size=3.5) +
  scale_x_continuous(labels = comma) +
  labs(title = paste("Top 10 countries with confirmed cases per million habitant \n as of ",max(COVID_2$Date2)),
       x = "Confirmed cases per million habitants",
       y = "Country") +
  theme_minimal()

ggplotly(plot,tooltip = c("x"),width=750)

Plot the Human Development Index(HDI) Vs. the number of cases (applying a log transformation), and the proportion of recovered cases:

plot<-ggplot(data=COVID_3,aes(x=log(Cases_million),y=Year_2018,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(237/255,105/255,37/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="HDI Vs. logarithmus of COVID-19 cases by million inhabitants \n and proportion of recovered",
       x="ln(Cases/1M population)",
       y="HDI")

ggplotly(plot,tooltip = c("text"),width=750)
COVID_numeric_1<-COVID_3 %>% mutate(Log_cases=log(Cases_million),
                                    Death_percentage=(Total_deaths/Total_confirmed)*100) %>% 
  select(Log_cases,Recovered_percentage,Death_percentage,Year_2018)

corrplot(cor(COVID_numeric_1),method = "number",tl.col="black",tl.srt=15,
         col=colorRampPalette(c(rgb(204/255,197/255,126/255),rgb(237/255,105/255,37/255)))(200))

Measles<-read.csv("Measles_immunization.csv",sep=";",dec=".")
Measles$Country_2<-gsub("\\s*\\([^\\)]+\\)","",as.character(Measles$Country))
Measles$Country_2[Measles$Country_2=="United States"]<-"US"
Measles$Country_2[Measles$Country_2=="Korea"]<-"South Korea"
COVID_3<- COVID_3 %>% inner_join(Measles,by=c("Country.Region"="Country_2")) %>% select(-c("Country"))

Health expenditure (% of GDP)

Health_expenditure<-read.csv("Health_expenditure_GDP.csv",sep=";",dec=".")
Health_expenditure$Country_2<-gsub("\\s*\\([^\\)]+\\)","",
                                   as.character(Health_expenditure$Country))
Health_expenditure$Country_2[Health_expenditure$Country_2=="United States"]<-"US"
Health_expenditure$Country_2[Health_expenditure$Country_2=="Korea"]<-"South Korea"
COVID_3<- COVID_3 %>% inner_join(Health_expenditure,by=c("Country.Region"="Country_2")) %>% select(-c("Country"))
plot<-ggplot(data=COVID_3,aes(x=log(Cases_million),y=Expenditure_2016,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(204/255,197/255,126/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="Health expenditure (% of GDP) Vs. logarithmus of COVID-19 cases by million inhabitants \n and proportion of recovered",
       x="ln(Cases/1M population)",
       y="% of GDP in health")

ggplotly(plot,tooltip = c("text"),width=750)

Temperature

Temperature<-read.csv("GlobalLandTemperaturesByCountry.csv",sep=";")
Date_Temp<-as.Date(Temperature$dt, format="%d/%m/%Y") 

Temperature$Date_Temp<-Date_Temp

Extract month, filter IQ (Jan, Feb and Mar) and obtain the average IQ temperature:

Temperature<- Temperature %>% mutate(Month=month(Temperature$Date_Temp,label =TRUE),
                                     Year=year(Temperature$Date_Temp)) %>% 
  filter(Month %in% c("Jan","Feb","Mar") & Year>=2000)  %>% 
  group_by(Country) %>% summarise(Avg_IQ_temperature=mean(AverageTemperature))

Temperature<-Temperature[!is.na(Temperature$Avg_IQ_temperature),]
Temperature$Country_2<-gsub("\\s*\\([^\\)]+\\)","",
                                   as.character(Temperature$Country))
Temperature$Country_2[Temperature$Country_2=="United States"]<-"US"
Temperature$Country_2[Temperature$Country_2=="Korea"]<-"South Korea"
COVID_3<- COVID_3 %>% inner_join(Temperature,by=c("Country.Region"="Country_2")) %>% select(-c("Country"))

#write.csv(COVID_3,"COVID_Covariables.csv")
plot<-ggplot(data=COVID_3,aes(x=log(Cases_million),y=Avg_IQ_temperature,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(120/255,28/255,109/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="Average IQ temperature Vs. logarithmus of COVID-19 cases by million inhabitants \n and proportion of recovered",
       x="ln(Cases/1M population)",
       y="Temperature (Celcius)")

ggplotly(plot,tooltip = c("text"),width=750)

For the number of deaths

###DTP immunization

DTP_immunization<-read.csv("Infants_lacking_immunization_DTP.csv",sep=";")

Remove after parentheses:

DTP_immunization$Country_2<-gsub("\\s*\\([^\\)]+\\)","",
                                 as.character(DTP_immunization$Country))
DTP_immunization$Country_2[DTP_immunization$Country_2=="United States"]<-"US"
DTP_immunization$Country_2[DTP_immunization$Country_2=="Korea"]<-"South Korea"
COVID_4<- COVID_Country %>% inner_join(DTP_immunization,
                                       by=c("Country.Region"="Country_2")) %>% 
  select(Country.Region,Total_confirmed,Total_deaths,Total_Recovered,
         Lack_DTP_inmmunization_2018) %>% 
  mutate(Recovered_percentage=(Total_Recovered/Total_confirmed)*100,
         Death_rate=(Total_deaths/Total_confirmed)*100)
plot<-ggplot(data=COVID_4,aes(x=Death_rate,y=Lack_DTP_inmmunization_2018,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(120/255,28/255,109/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="% infants lacking DTP immunization Vs. death rate and \n proportion of recovered",
       x="Novel coronavirus death rate",
       y="% of infants")

ggplotly(plot,tooltip = c("text"),width=750)

Infants lacking immunization, measles (% of one-year-olds)

COVID_4<- COVID_4 %>% inner_join(Measles,by=c("Country.Region"="Country_2")) %>% select(-c("Country"))
ggplot(COVID_4, aes(y=Measles_2018)) + 
  geom_boxplot(fill="dodgerblue4",outlier.shape = 21, 
               outlier.fill = "firebrick",alpha=0.75) +
  ggtitle("Boxplot of % infants lacking measles immunization") + ylab("% of infants") +
  theme_minimal()

ggplot(COVID_4, aes(Measles_2018)) + 
  geom_histogram(fill="dodgerblue4",bins=20,alpha=0.8) +
  ggtitle("Histogram of % infants lacking measles immunization") + 
  xlab("% of infants") + 
  ylab("Count") +
  theme_minimal()

plot<-ggplot(data=COVID_4,aes(x=Death_rate,y=Measles_2018,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(237/255,105/255,37/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="% infants lacking measles immunization Vs. fatality rate \n and proportion of recovered",
       x="Fatality rate (%)",
       y="% of infants")

ggplotly(plot,tooltip = c("text"),width=750)

Fitting a regression model

Transforming with ln and converting temperature as kelvin:

Mod1<-lm(log(COVID_3$Cases_million)~log(COVID_3$Year_2018)+log(COVID_3$Measles_2018)+
           log(COVID_3$Expenditure_2016)+log(COVID_3$Avg_IQ_temperature+273.15))
summary(Mod1)

Call:
lm(formula = log(COVID_3$Cases_million) ~ log(COVID_3$Year_2018) + 
    log(COVID_3$Measles_2018) + log(COVID_3$Expenditure_2016) + 
    log(COVID_3$Avg_IQ_temperature + 273.15))

Residuals:
    Min      1Q  Median      3Q     Max 
-4.0445 -0.8320 -0.1258  0.7860  5.0653 

Coefficients:
                                         Estimate Std. Error t value Pr(>|t|)    
(Intercept)                              32.48921   20.05878   1.620  0.10755    
log(COVID_3$Year_2018)                    7.70457    0.67547  11.406  < 2e-16 ***
log(COVID_3$Measles_2018)                 0.03906    0.12115   0.322  0.74762    
log(COVID_3$Expenditure_2016)             0.94790    0.31777   2.983  0.00337 ** 
log(COVID_3$Avg_IQ_temperature + 273.15) -4.85499    3.53196  -1.375  0.17146    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.389 on 140 degrees of freedom
Multiple R-squared:  0.6975,    Adjusted R-squared:  0.6889 
F-statistic:  80.7 on 4 and 140 DF,  p-value: < 2.2e-16

Stepwise with AIC critertion:

Mod2<-step(Mod1,direction = "both")
Start:  AIC=100.11
log(COVID_3$Cases_million) ~ log(COVID_3$Year_2018) + log(COVID_3$Measles_2018) + 
    log(COVID_3$Expenditure_2016) + log(COVID_3$Avg_IQ_temperature + 
    273.15)

                                           Df Sum of Sq    RSS     AIC
- log(COVID_3$Measles_2018)                 1     0.200 270.14  98.222
- log(COVID_3$Avg_IQ_temperature + 273.15)  1     3.643 273.59 100.059
<none>                                                  269.94 100.115
- log(COVID_3$Expenditure_2016)             1    17.157 287.10 107.049
- log(COVID_3$Year_2018)                    1   250.858 520.80 193.402

Step:  AIC=98.22
log(COVID_3$Cases_million) ~ log(COVID_3$Year_2018) + log(COVID_3$Expenditure_2016) + 
    log(COVID_3$Avg_IQ_temperature + 273.15)

                                           Df Sum of Sq    RSS     AIC
- log(COVID_3$Avg_IQ_temperature + 273.15)  1     3.567 273.71  98.124
<none>                                                  270.14  98.222
+ log(COVID_3$Measles_2018)                 1     0.200 269.94 100.115
- log(COVID_3$Expenditure_2016)             1    17.513 287.66 105.330
- log(COVID_3$Year_2018)                    1   306.428 576.57 206.153

Step:  AIC=98.12
log(COVID_3$Cases_million) ~ log(COVID_3$Year_2018) + log(COVID_3$Expenditure_2016)

                                           Df Sum of Sq    RSS     AIC
<none>                                                  273.71  98.124
+ log(COVID_3$Avg_IQ_temperature + 273.15)  1      3.57 270.14  98.222
+ log(COVID_3$Measles_2018)                 1      0.12 273.59 100.059
- log(COVID_3$Expenditure_2016)             1     23.11 296.82 107.879
- log(COVID_3$Year_2018)                    1    445.08 718.79 236.122
summary(Mod2)

Call:
lm(formula = log(COVID_3$Cases_million) ~ log(COVID_3$Year_2018) + 
    log(COVID_3$Expenditure_2016))

Residuals:
    Min      1Q  Median      3Q     Max 
-3.9696 -0.7793  0.0030  0.8599  5.0838 

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                     4.9541     0.6584   7.525 5.53e-12 ***
log(COVID_3$Year_2018)          8.0064     0.5269  15.196  < 2e-16 ***
log(COVID_3$Expenditure_2016)   1.0627     0.3069   3.463 0.000707 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.388 on 142 degrees of freedom
Multiple R-squared:  0.6933,    Adjusted R-squared:  0.689 
F-statistic: 160.5 on 2 and 142 DF,  p-value: < 2.2e-16

Normality of residuals:

hist(Mod2$residuals)

shapiro.test(Mod2$residuals)

    Shapiro-Wilk normality test

data:  Mod2$residuals
W = 0.9852, p-value = 0.1221

Prediction power: separate between training and testing:

set.seed(179819)
Sample <- sample(1:length(COVID_3$Cases_million),length(COVID_3$Cases_million)*0.20)
t.testing<- COVID_3[Sample,]
t.training<- COVID_3[-Sample,]

Transform the training and testing variables as before:

t.training<-t.training %>% mutate(Cases_million_log=log(Cases_million),HDI_log=log(Year_2018),
                      GDP_log=log(Expenditure_2016),
                      Temperature_log_kelvin=log(Avg_IQ_temperature+273.15)) 

t.training<-t.training[,14:17]

t.testing<-t.testing %>% mutate(Cases_million_log=log(Cases_million),HDI_log=log(Year_2018),
                      GDP_log=log(Expenditure_2016),
                      Temperature_log_kelvin=log(Avg_IQ_temperature+273.15)) 

t.testing<-t.testing[,14:17]

Fit the same model with training

Mod3<-lm(Cases_million_log~., data=t.training)
summary(Mod3)

Call:
lm(formula = Cases_million_log ~ ., data = t.training)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.0072 -0.8207  0.0104  0.8562  5.0870 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)             41.7853    23.2798   1.795  0.07536 .  
HDI_log                  7.6095     0.6688  11.378  < 2e-16 ***
GDP_log                  0.9415     0.3551   2.651  0.00919 ** 
Temperature_log_kelvin  -6.4811     4.0903  -1.584  0.11590    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.4 on 112 degrees of freedom
Multiple R-squared:  0.7091,    Adjusted R-squared:  0.7013 
F-statistic: 91.01 on 3 and 112 DF,  p-value: < 2.2e-16

Error functions:

# Residual Sum of Square (RSS)
RSS<-function(Pred,Actual) {
  ss<-sum((Actual-Pred)^2)
  return(ss)
}

# Residual Standard Error (RSE)
RSE<-function(Pred,Real,NumPred) {
  N<-length(Real)-NumPred-1  
  ss<-sqrt((1/N)*RSS(Pred,Real))
  return(ss)
}
# Mean Squared Error 
MSE <- function(Pred,Actual) {
  N<-length(Actual)
  ss<-(1/N)*RSS(Pred,Actual)
  return(ss)
}

# Relative error
RelativeError<-function(Pred,Actual) {
  ss<-sum(abs(Actual-Pred))/sum(abs(Actual))
  return(ss)
}

Prediction:

Pred<-predict(Mod3,t.testing)

Errors:

RSS_Mod3<-RSS(Pred,t.testing$Cases_million_log)
RSE_Mod3<-RSE(Pred,t.testing$Cases_million_log,dim(t.testing)[2]-1)
MSE_Mod3<-MSE(Pred,t.testing$Cases_million_log)
RelativeError_Mod3<-RelativeError(Pred,t.testing$Cases_million_log)

Mod3_Errors<-c(RSS_Mod3,RSE_Mod3,MSE_Mod3,RelativeError_Mod3)

Now, a model without temperature:

t.training <- t.training %>% select(-Temperature_log_kelvin)
t.testing <- t.testing %>% select(-Temperature_log_kelvin)
Mod4<-lm(Cases_million_log~., data=t.training)
summary(Mod4)

Call:
lm(formula = Cases_million_log ~ ., data = t.training)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.9361 -0.7526 -0.0133  0.8424  5.1368 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   4.9167     0.7280   6.754 6.54e-10 ***
HDI_log       8.1355     0.5845  13.919  < 2e-16 ***
GDP_log       1.1227     0.3385   3.317  0.00122 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.41 on 113 degrees of freedom
Multiple R-squared:  0.7026,    Adjusted R-squared:  0.6973 
F-statistic: 133.5 on 2 and 113 DF,  p-value: < 2.2e-16

Prediction:

Pred<-predict(Mod4,t.testing)

Errors:

RSS_Mod4<-RSS(Pred,t.testing$Cases_million_log)
RSE_Mod4<-RSE(Pred,t.testing$Cases_million_log,dim(t.testing)[2]-1)
MSE_Mod4<-MSE(Pred,t.testing$Cases_million_log)
RelativeError_Mod4<-RelativeError(Pred,t.testing$Cases_million_log)

Mod4_Errors<-c(RSS_Mod4,RSE_Mod4,MSE_Mod4,RelativeError_Mod4)

Create a radarplot:

Errors<-rbind(Mod3_Errors,Mod4_Errors)

rownames(Errors)<-c("Model with temperature","Model without temperature")

colnames(Errors)<-c("Residual Sum of Square","Residual Standard Error","Mean Squared Error","Relative error")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("With temperature","Without temperature"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))

Forecast by country

Republic of Costa Rica

Model with temperature

CostaRica_Temp<-read.csv("CostaRica_Temperature.csv",sep=";")
CostaRica_Temp$Date<-as.Date(CostaRica_Temp$Date,format="%d/%m/%Y")
COVID_2_Day_CostaRica_Temp<-COVID_2_Day_CostaRica %>%
  inner_join(CostaRica_Temp,by=c("Date2"="Date"))

COVID_Day_series_CostaRica_Temp<-xts(COVID_2_Day_CostaRica_Temp$World_confirmed, order.by=COVID_2_Day_CostaRica_Temp$Date2)
COVID_series_CostaRica_Temp_Train<-COVID_Day_series_CostaRica_Temp[1:64] #Until March 25th
COVID_series_CostaRica_Temp_Test<-COVID_Day_series_CostaRica_Temp[65:length(COVID_Day_series_CostaRica_Temp)] #From March 26th onwards

Get the lagged difference:

plot(diff(COVID_series_CostaRica_Temp_Train),type="l",main="1st ") 

Correlograms of first diference:

tsdisplay(diff(COVID_series_CostaRica_Temp_Train))

Arima model: ARIMA(1,2,1)

#Auto Arima for Costa Rica:

ARIMA1_CostaRica<-auto.arima(COVID_series_CostaRica_Temp_Train,
                             xreg = COVID_2_Day_CostaRica_Temp$Temperature_CostaRica[1:64])
summary(ARIMA1_CostaRica)
Series: COVID_series_CostaRica_Temp_Train 
Regression with ARIMA(1,2,1) errors 

Coefficients:
          ar1     ma1     xreg
      -0.7967  0.4328  -0.2134
s.e.   0.1462  0.1840   0.1159

sigma^2 estimated as 7.227:  log likelihood=-147.93
AIC=303.86   AICc=304.57   BIC=312.37

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE         ACF1
Training set 0.4281342 2.581091 1.397242 NaN  Inf 0.4379416 -0.009839234

Predictive error functions:

#Relative error
RE <- function(Fore,Actual) {
  return(sum(abs(Fore-Actual))/abs(sum(Actual)))
}


#MAPE
MAPE<-function(Fore,Actual){
  return(
    mean(abs(Actual-Fore)/abs(Actual))*100
    )
}

# mean squared error (MSE)
MSE<-function(Fore,Actual) {
  N<-length(Actual)
  ss<-sum((Actual-Fore)^2)
  return((1/N)*ss)
}

#PFA
PFA <- function(Fore,Actual) {
  Total<-0
  N<-length(Fore)
  for(i in 1:N) {
    if(Fore[i]>=Actual[i])
      Total<-Total+1      
  }
  return(Total/N)
}

Forecast prediction to compare

Test_CostaRica_Temp<-COVID_2_Day_CostaRica_Temp$Temperature_CostaRica[65:length(COVID_Day_series_CostaRica_Temp)]


P_CRI_1<-forecast(ARIMA1_CostaRica,xreg = Test_CostaRica_Temp)

Calculate errors:

RE_CRI_1<-RE(P_CRI_1$mean,as.vector(COVID_series_CostaRica_Temp_Test))
MAPE_CRI_1<-MAPE(P_CRI_1$mean,as.vector(COVID_series_CostaRica_Temp_Test))
MSE_CRI_1<-MSE(P_CRI_1$mean,as.vector(COVID_series_CostaRica_Temp_Test))
PFA_CRI_1<-PFA(P_CRI_1$mean,as.vector(COVID_series_CostaRica_Temp_Test))

Errors.CRI_1<-c(RE_CRI_1,MAPE_CRI_1,MSE_CRI_1,PFA_CRI_1)
Errors.CRI_1
[1]   0.02889533   3.30422945 196.94131428   0.42105263

Manual MAPE:

dd<-data.frame(Actual=as.vector(COVID_series_CostaRica_Temp_Test),
               Fore=P_CRI_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
[1] 3.304229

Model without temperature

ARIMA(1,2,0)

#Auto Arima for Costa Rica:

ARIMA2_CostaRica<-auto.arima(COVID_series_CostaRica_Temp_Train)
summary(ARIMA2_CostaRica)
Series: COVID_series_CostaRica_Temp_Train 
ARIMA(1,2,0) 

Coefficients:
          ar1
      -0.4701
s.e.   0.1133

sigma^2 estimated as 7.607:  log likelihood=-150.5
AIC=304.99   AICc=305.19   BIC=309.24

Training set error measures:
                    ME     RMSE      MAE     MPE     MAPE      MASE       ACF1
Training set 0.5145597 2.692615 1.200452 7.86228 21.76863 0.3762612 0.04160954

Forecast prediction to compare

P_CRI_2<-forecast(ARIMA2_CostaRica,h=length(COVID_series_CostaRica_Temp_Test))

Calculate errors:

RE_CRI_2<-RE(P_CRI_2$mean,as.vector(COVID_series_CostaRica_Temp_Test))
MAPE_CRI_2<-MAPE(P_CRI_2$mean,as.vector(COVID_series_CostaRica_Temp_Test))
MSE_CRI_2<-MSE(P_CRI_2$mean,as.vector(COVID_series_CostaRica_Temp_Test))
PFA_CRI_2<-PFA(P_CRI_2$mean,as.vector(COVID_series_CostaRica_Temp_Test))

Errors.CRI_2<-c(RE_CRI_2,MAPE_CRI_2,MSE_CRI_2,PFA_CRI_2)
Errors.CRI_2
[1]   0.02759212   3.26239839 190.14695712   0.36842105
Errors<-rbind(Errors.CRI_1,Errors.CRI_2)

rownames(Errors)<-c("ARIMAX(1,2,1)","ARIMA(1,2,0)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(1,2,1)","ARIMA(1,2,0)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))

Conclusion: Keep model with temperature

Make model with the overall series

#Auto Arima for Costa Rica:

ARIMA_Final_CostaRica<-auto.arima(COVID_Day_series_CostaRica_Temp,
                             xreg = COVID_2_Day_CostaRica_Temp$Temperature_CostaRica)
summary(ARIMA_Final_CostaRica)
Series: COVID_Day_series_CostaRica_Temp 
Regression with ARIMA(1,1,1) errors 

Coefficients:
         ar1      ma1     xreg
      0.9775  -0.3795  -0.1737
s.e.  0.0216   0.1349   0.1794

sigma^2 estimated as 18.62:  log likelihood=-235.89
AIC=479.78   AICc=480.3   BIC=489.41

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE       ACF1
Training set 0.5865101 4.209981 2.336628 NaN  Inf 0.3130776 0.03301146

Final forecast:

Future_CostaRica_Temp<-CostaRica_Temp$Temperature_CostaRica[CostaRica_Temp$Date>max(COVID_2$Date2)]

P_CRI_Final<-forecast(ARIMA_Final_CostaRica,xreg = Future_CostaRica_Temp)
Low_lim_CRI<-data.frame(P_CRI_Final$lower)[,2]
Upp_lim_CRI<-data.frame(P_CRI_Final$upper)[,2]

For making the plot:

##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_CostaRica_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_CostaRica_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_CostaRica_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_CRI <- xts(Low_lim_CRI,order.by=per_2)
Forecast_CRI <- xts(P_CRI_Final$mean,order.by=per_2)
Upp_lim_CRI <- xts(Upp_lim_CRI,order.by=per_2)
predNA <- rep(NA, length(Forecast_CRI))
B <- cbind(predNA, Low_lim_CRI, Forecast_CRI, Upp_lim_CRI)

all_series_CRI <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_CRI) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
dygraph(all_series_CRI, main="SARS-COV2-outbreak: Total Costa Rica cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(189/255,44/255,47/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(10/255,44/255,119/255)) %>% 
  dyRangeSelector()

NA

Mexico

Model with temperature

Mexico_Temp<-read.csv("Mexico_Temperature.csv",sep=";")
Mexico_Temp$Date<-as.Date(Mexico_Temp$Date,format="%d/%m/%Y")
COVID_2_Day_Mexico<- COVID_2 %>% 
  filter(Country.Region %in% c("Mexico")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Mexico_Temp<-COVID_2_Day_Mexico %>%
  inner_join(Mexico_Temp,by=c("Date2"="Date"))

COVID_Day_series_Mexico_Temp<-xts(COVID_2_Day_Mexico_Temp$World_confirmed, order.by=COVID_2_Day_Mexico_Temp$Date2)
COVID_series_Mexico_Train<-COVID_Day_series_Mexico_Temp[1:74] #Until April 4th
COVID_series_Mexico_Test<-COVID_Day_series_Mexico_Temp[75:length(COVID_Day_series_Mexico_Temp)] #From April 5th onwards

Get the lagged difference:

plot(diff(COVID_series_Mexico_Train),type="l",main="1st ") 

Correlograms of first diference:

tsdisplay(diff(COVID_series_Mexico_Train))

Arima model: ARIMA(2,2,3)

#Auto Arima for Mexico:

#ARIMA1_Mexico<-auto.arima(COVID_series_Mexico_Train,
                             #xreg = COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Mexico)
Series: COVID_series_Mexico_Train 
Regression with ARIMA(2,2,3) errors 

Coefficients:
          ar1      ar2     ma1     ma2     ma3    xreg
      -0.9027  -0.7333  1.1212  0.8938  0.6734  0.0623
s.e.   0.1121   0.1592  0.1093  0.2027  0.1153  0.2567

sigma^2 estimated as 123.3:  log likelihood=-273.92
AIC=561.84   AICc=563.59   BIC=577.78

Training set error measures:
                   ME     RMSE      MAE MPE MAPE      MASE       ACF1
Training set 1.716467 10.48796 5.177678 NaN  Inf 0.2239162 -0.1047828

Forecast prediction to compare

Test_Mexico_Temp<-COVID_2_Day_Mexico_Temp$Temperature_Mexico[75:length(COVID_Day_series_Mexico_Temp)]


P_MEX_1<-forecast(ARIMA1_Mexico,xreg = Test_Mexico_Temp)

Calculate errors:

RE_MEX_1<-RE(P_MEX_1$mean,as.vector(COVID_series_Mexico_Test))
MAPE_MEX_1<-MAPE(P_MEX_1$mean,as.vector(COVID_series_Mexico_Test))
MSE_MEX_1<-MSE(P_MEX_1$mean,as.vector(COVID_series_Mexico_Test))
PFA_MEX_1<-PFA(P_MEX_1$mean,as.vector(COVID_series_Mexico_Test))

Errors.MEX_1<-c(RE_MEX_1,MAPE_MEX_1,MSE_MEX_1,PFA_MEX_1)
Errors.MEX_1
[1] 1.889868e-01 1.632085e+01 5.534272e+05 0.000000e+00

Manual MAPE:

dd<-data.frame(Actual=as.vector(COVID_series_Mexico_Test),
               Fore=P_MEX_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
[1] 16.32085

Model without temperature

ARIMA(2,2,4)

#Auto Arima for Mexico:

ARIMA2_Mexico<-auto.arima(COVID_series_Mexico_Train)
summary(ARIMA2_Mexico)
Series: COVID_series_Mexico_Train 
ARIMA(2,2,4) 

Coefficients:
          ar1      ar2     ma1     ma2     ma3      ma4
      -0.7232  -0.7137  0.8661  0.6669  0.4019  -0.3260
s.e.   0.1415   0.1688  0.1697  0.2135  0.1529   0.1463

sigma^2 estimated as 116.5:  log likelihood=-272.23
AIC=558.45   AICc=560.2   BIC=574.39

Training set error measures:
                   ME     RMSE     MAE      MPE     MAPE      MASE        ACF1
Training set 2.187346 10.19436 4.88144 4.696633 18.33364 0.2111049 -0.05173888

Forecast prediction to compare

P_MEX_2<-forecast(ARIMA2_Mexico,h=length(COVID_series_Mexico_Test))

Calculate errors:

RE_MEX_2<-RE(P_MEX_2$mean,as.vector(COVID_series_Mexico_Test))
MAPE_MEX_2<-MAPE(P_MEX_2$mean,as.vector(COVID_series_Mexico_Test))
MSE_MEX_2<-MSE(P_MEX_2$mean,as.vector(COVID_series_Mexico_Test))
PFA_MEX_2<-PFA(P_MEX_2$mean,as.vector(COVID_series_Mexico_Test))

Errors.MEX_2<-c(RE_MEX_2,MAPE_MEX_2,MSE_MEX_2,PFA_MEX_2)
Errors.MEX_2
[1] 1.939056e-01 1.673569e+01 5.832504e+05 0.000000e+00
Errors<-rbind(Errors.MEX_1,Errors.MEX_2)

rownames(Errors)<-c("ARIMAX(2,2,3)","ARIMA(2,2,4)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(2,2,3)","ARIMA(2,2,4)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))

Conclusion: Keep model with temperature

Make model with the overall series

#Auto Arima for Mexico:

ARIMA_Final_Mexico<-auto.arima(COVID_Day_series_Mexico_Temp,
                             xreg = COVID_2_Day_Mexico_Temp$Temperature_Mexico)
summary(ARIMA_Final_Mexico)
Series: COVID_Day_series_Mexico_Temp 
Regression with ARIMA(2,2,1) errors 

Coefficients:
         ar1     ar2      ma1     xreg
      0.4489  0.5325  -0.8817  -0.2137
s.e.  0.0992  0.0967   0.0531   0.9974

sigma^2 estimated as 618.9:  log likelihood=-373.91
AIC=757.82   AICc=758.62   BIC=769.79

Training set error measures:
                   ME     RMSE      MAE MPE MAPE      MASE       ACF1
Training set 3.719845 23.96071 10.93701 NaN  Inf 0.1924126 -0.0546056

Final forecast:

Future_Mexico_Temp<-Mexico_Temp$Temperature_Mexico[Mexico_Temp$Date>max(COVID_2$Date2)]

P_MEX_Final<-forecast(ARIMA_Final_Mexico,xreg = Future_Mexico_Temp)
Low_lim_MEX<-data.frame(P_MEX_Final$lower)[,2]
Upp_lim_MEX<-data.frame(P_MEX_Final$upper)[,2]

For making the plot:

##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Mexico_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Mexico_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Mexico_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_MEX <- xts(Low_lim_MEX,order.by=per_2)
Forecast_MEX <- xts(P_MEX_Final$mean,order.by=per_2)
Upp_lim_MEX <- xts(Upp_lim_MEX,order.by=per_2)
predNA <- rep(NA, length(Forecast_MEX))
B <- cbind(predNA, Low_lim_MEX, Forecast_MEX, Upp_lim_MEX)

all_series_MEX <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_MEX) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
dygraph(all_series_MEX, main="SARS-COV2-outbreak: Total Mexico cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(189/255,44/255,47/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(43/255,102/255,73/255)) %>% 
  dyRangeSelector()

NA

Italy

Model with temperature

Italy_Temp<-read.csv("Italy_Temperature.csv",sep=";")
Italy_Temp$Date<-as.Date(Italy_Temp$Date,format="%d/%m/%Y")
COVID_2_Day_Italy<- COVID_2 %>% 
  filter(Country.Region %in% c("Italy")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Italy_Temp<-COVID_2_Day_Italy %>%
  inner_join(Italy_Temp,by=c("Date2"="Date"))

COVID_Day_series_Italy_Temp<-xts(COVID_2_Day_Italy_Temp$World_confirmed, order.by=COVID_2_Day_Italy_Temp$Date2)
COVID_Day_series_Italy_Train<-COVID_Day_series_Italy_Temp[1:64] #Until March 25th
COVID_Day_series_Italy_Test<-COVID_Day_series_Italy_Temp[65:length(COVID_Day_series_Italy_Temp)] #From March 26th onwards

Get the lagged difference:

plot(diff(COVID_Day_series_Italy_Train),type="l",main="1st ") 

Correlograms of first diference:

tsdisplay(diff(COVID_Day_series_Italy_Train))

Arima model: ARIMA(1,2,0)

#Auto Arima for Italy:

ARIMA1_Italy<-auto.arima(COVID_Day_series_Italy_Train,
                             xreg = COVID_2_Day_Italy_Temp$Temperature_Italy[1:64])
#ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Italy)
Series: COVID_Day_series_Italy_Train 
Regression with ARIMA(1,2,0) errors 

Coefficients:
          ar1     xreg
      -0.5094  21.4065
s.e.   0.1119  20.2612

sigma^2 estimated as 489396:  log likelihood=-493.24
AIC=992.47   AICc=992.89   BIC=998.85

Training set error measures:
                   ME     RMSE      MAE MPE MAPE      MASE         ACF1
Training set 123.6808 677.3545 335.2238 NaN  Inf 0.2839123 -0.005148955

Forecast prediction to compare

Test_Italy_Temp<-COVID_2_Day_Italy_Temp$Temperature_Italy[65:length(COVID_Day_series_Italy_Temp)]


P_ITA_1<-forecast(ARIMA1_Italy,xreg = Test_Italy_Temp)

Calculate errors:

RE_ITA_1<-RE(P_ITA_1$mean,as.vector(COVID_Day_series_Italy_Test))
MAPE_ITA_1<-MAPE(P_ITA_1$mean,as.vector(COVID_Day_series_Italy_Test))
MSE_ITA_1<-MSE(P_ITA_1$mean,as.vector(COVID_Day_series_Italy_Test))
PFA_ITA_1<-PFA(P_ITA_1$mean,as.vector(COVID_Day_series_Italy_Test))

Errors.ITA_1<-c(RE_ITA_1,MAPE_ITA_1,MSE_ITA_1,PFA_ITA_1)
Errors.ITA_1
[1] 4.208045e-02 3.728108e+00 4.844373e+07 7.368421e-01

Manual MAPE:

dd<-data.frame(Actual=as.vector(COVID_Day_series_Italy_Test),
               Fore=P_ITA_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
[1] 3.728108

Model without temperature

ARIMA(1,2,0)

#Auto Arima for Italy:
ARIMA2_Italy<-auto.arima(COVID_Day_series_Italy_Train)
summary(ARIMA2_Italy)
Series: COVID_Day_series_Italy_Train 
ARIMA(1,2,0) 

Coefficients:
          ar1
      -0.5414
s.e.   0.1046

sigma^2 estimated as 489700:  log likelihood=-493.79
AIC=991.58   AICc=991.79   BIC=995.84

Training set error measures:
                   ME     RMSE     MAE      MPE     MAPE      MASE        ACF1
Training set 125.8084 683.1877 295.442 5.335548 11.24576 0.2502197 -0.02348722

Forecast prediction to compare

P_ITA_2<-forecast(ARIMA2_Italy,h=length(COVID_Day_series_Italy_Test))

Calculate errors:

RE_ITA_2<-RE(P_ITA_2$mean,as.vector(COVID_Day_series_Italy_Test))
MAPE_ITA_2<-MAPE(P_ITA_2$mean,as.vector(COVID_Day_series_Italy_Test))
MSE_ITA_2<-MSE(P_ITA_2$mean,as.vector(COVID_Day_series_Italy_Test))
PFA_ITA_2<-PFA(P_ITA_2$mean,as.vector(COVID_Day_series_Italy_Test))

Errors.ITA_2<-c(RE_ITA_2,MAPE_ITA_2,MSE_ITA_2,PFA_ITA_2)
Errors.ITA_2
[1] 3.967952e-02 3.531253e+00 4.311796e+07 6.842105e-01
Errors<-rbind(Errors.ITA_1,Errors.ITA_2)

rownames(Errors)<-c("ARIMAX(1,2,0)","ARIMA(1,2,0)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(1,2,0)","ARIMA(1,2,0)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))

Conclusion: Keep model without temperature

Make model with the overall series

#Auto Arima for Italy:

ARIMA_Final_Italy<-auto.arima(COVID_Day_series_Italy_Temp)
summary(ARIMA_Final_Italy)
Series: COVID_Day_series_Italy_Temp 
ARIMA(1,2,0) 

Coefficients:
          ar1
      -0.4579
s.e.   0.0987

sigma^2 estimated as 494348:  log likelihood=-645.54
AIC=1295.09   AICc=1295.24   BIC=1299.88

Training set error measures:
                   ME    RMSE      MAE     MPE     MAPE     MASE       ACF1
Training set 60.56058 690.275 354.0068 3.79996 8.394382 0.181979 0.00588171

Final forecast:

Future_Italy_Temp<-Italy_Temp$Temperature_Italy[Italy_Temp$Date>max(COVID_2$Date2)]

P_ITA_Final<-forecast(ARIMA_Final_Italy,h=length(Future_Italy_Temp))
Low_lim_ITA<-data.frame(P_ITA_Final$lower)[,2]
Upp_lim_ITA<-data.frame(P_ITA_Final$upper)[,2]

For making the plot:

##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Italy_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Italy_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Italy_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_ITA <- xts(Low_lim_ITA,order.by=per_2)
Forecast_ITA <- xts(P_ITA_Final$mean,order.by=per_2)
Upp_lim_ITA <- xts(Upp_lim_ITA,order.by=per_2)
predNA <- rep(NA, length(Forecast_ITA))
B <- cbind(predNA, Low_lim_ITA, Forecast_ITA, Upp_lim_ITA)

all_series_ITA <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_ITA) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
dygraph(all_series_ITA, main="SARS-COV2-outbreak: Total Italy cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(190/255,59/255,61/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(64/255,143/255,78/255)) %>% 
  dyRangeSelector()

NA

Lebanon

Model with temperature

Lebanon_Temp<-read.csv("Lebanon_Temperature.csv",sep=";")
Lebanon_Temp$Date<-as.Date(Lebanon_Temp$Date,format="%d/%m/%Y")
COVID_2_Day_Lebanon<- COVID_2 %>% 
  filter(Country.Region %in% c("Lebanon")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Lebanon_Temp<-COVID_2_Day_Lebanon %>%
  inner_join(Lebanon_Temp,by=c("Date2"="Date"))

COVID_Day_series_Lebanon_Temp<-xts(COVID_2_Day_Lebanon_Temp$World_confirmed, order.by=COVID_2_Day_Lebanon_Temp$Date2)
COVID_series_Lebanon_Train<-COVID_Day_series_Lebanon_Temp[1:74] #Until April 4th
COVID_series_Lebanon_Test<-COVID_Day_series_Lebanon_Temp[75:length(COVID_Day_series_Lebanon_Temp)] #From April 4th onwards

Get the lagged difference:

plot(diff(COVID_series_Lebanon_Train),type="l",main="1st ") 

Correlograms of first diference:

tsdisplay(diff(COVID_series_Lebanon_Train))

Arima model: ARIMA(1,1,2)

#Auto Arima for Lebanon:

ARIMA1_Lebanon<-auto.arima(COVID_series_Lebanon_Train,
                             xreg = COVID_2_Day_Lebanon_Temp$Temperature_Lebanon[1:74])
#ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Lebanon)
Series: COVID_series_Lebanon_Train 
Regression with ARIMA(1,1,2) errors 

Coefficients:
         ar1      ma1     ma2    xreg
      0.9216  -0.8504  0.4970  0.0461
s.e.  0.0488   0.0989  0.1263  0.2338

sigma^2 estimated as 60.69:  log likelihood=-252.36
AIC=514.72   AICc=515.62   BIC=526.17

Training set error measures:
                   ME     RMSE      MAE MPE MAPE      MASE        ACF1
Training set 1.101941 7.522299 3.444682 NaN  Inf 0.4835803 -0.03853757

Forecast prediction to compare

Test_Lebanon_Temp<-COVID_2_Day_Lebanon_Temp$Temperature_Lebanon[75:length(COVID_Day_series_Lebanon_Temp)]


P_LBN_1<-forecast(ARIMA1_Lebanon,xreg = Test_Lebanon_Temp)

Calculate errors:

RE_LBN_1<-RE(P_LBN_1$mean,as.vector(COVID_series_Lebanon_Test))
MAPE_LBN_1<-MAPE(P_LBN_1$mean,as.vector(COVID_series_Lebanon_Test))
MSE_LBN_1<-MSE(P_LBN_1$mean,as.vector(COVID_series_Lebanon_Test))
PFA_LBN_1<-PFA(P_LBN_1$mean,as.vector(COVID_series_Lebanon_Test))

Errors.LBN_1<-c(RE_LBN_1,MAPE_LBN_1,MSE_LBN_1,PFA_LBN_1)
Errors.LBN_1
[1]   0.03207225   3.07734487 532.24875637   0.33333333

Manual MAPE:

dd<-data.frame(Actual=as.vector(COVID_series_Lebanon_Test),
               Fore=P_LBN_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
[1] 3.077345

Model without temperature

ARIMA(0,2,2)

#Auto Arima for Lebanon:
ARIMA2_Lebanon<-auto.arima(COVID_series_Lebanon_Train)
summary(ARIMA2_Lebanon)
Series: COVID_series_Lebanon_Train 
ARIMA(0,2,2) 

Coefficients:
          ma1     ma2
      -0.8655  0.4654
s.e.   0.0939  0.1264

sigma^2 estimated as 61.55:  log likelihood=-249.92
AIC=505.84   AICc=506.19   BIC=512.67

Training set error measures:
                   ME     RMSE      MAE      MPE     MAPE      MASE        ACF1
Training set 0.286841 7.630167 3.533028 2.334374 15.75454 0.4959827 -0.05944259

Forecast prediction to compare

P_LBN_2<-forecast(ARIMA2_Lebanon,h=length(COVID_series_Lebanon_Test))

Calculate errors:

RE_LBN_2<-RE(P_LBN_2$mean,as.vector(COVID_series_Lebanon_Test))
MAPE_LBN_2<-MAPE(P_LBN_2$mean,as.vector(COVID_series_Lebanon_Test))
MSE_LBN_2<-MSE(P_LBN_2$mean,as.vector(COVID_series_Lebanon_Test))
PFA_LBN_2<-PFA(P_LBN_2$mean,as.vector(COVID_series_Lebanon_Test))

Errors.LBN_2<-c(RE_LBN_2,MAPE_LBN_2,MSE_LBN_2,PFA_LBN_2)
Errors.LBN_2
[1]  0.01150777  1.15133297 56.61087377  0.55555556
Errors<-rbind(Errors.LBN_1,Errors.LBN_2)

rownames(Errors)<-c("ARIMAX(1,1,2)","ARIMA(0,2,2)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(1,1,2)","ARIMA(0,2,2)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))

Conclusion: Keep model without temperature

Make model with the overall series

#Auto Arima for Lebanon:

ARIMA_Final_Lebanon<-auto.arima(COVID_Day_series_Lebanon_Temp)
summary(ARIMA_Final_Lebanon)
Series: COVID_Day_series_Lebanon_Temp 
ARIMA(2,2,0) 

Coefficients:
          ar1      ar2
      -0.9016  -0.2596
s.e.   0.1070   0.1064

sigma^2 estimated as 62.97:  log likelihood=-282.13
AIC=570.25   AICc=570.57   BIC=577.44

Training set error measures:
                    ME     RMSE      MAE      MPE     MAPE      MASE        ACF1
Training set 0.1748628 7.741674 3.821745 2.495435 12.97023 0.4958593 -0.02103357

Final forecast:

Future_Lebanon_Temp<-Lebanon_Temp$Temperature_Lebanon[Lebanon_Temp$Date>max(COVID_2$Date2)]

P_LBN_Final<-forecast(ARIMA_Final_Lebanon,h=length(Future_Lebanon_Temp))
Low_lim_LBN<-data.frame(P_LBN_Final$lower)[,2]
Upp_lim_LBN<-data.frame(P_LBN_Final$upper)[,2]

For making the plot:

##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Lebanon_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Lebanon_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Lebanon_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_LBN <- xts(Low_lim_LBN,order.by=per_2)
Forecast_LBN <- xts(P_LBN_Final$mean,order.by=per_2)
Upp_lim_LBN <- xts(Upp_lim_LBN,order.by=per_2)
predNA <- rep(NA, length(Forecast_ITA))
B <- cbind(predNA, Low_lim_LBN, Forecast_LBN, Upp_lim_LBN)

all_series_LBN <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_LBN) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
dygraph(all_series_LBN, main="SARS-COV2-outbreak: Total Lebanon cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(218/255,55/255,50/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(73/255,163/255,90/255)) %>% 
  dyRangeSelector()

NA

Colombia

Model with temperature

Colombia_Temp<-read.csv("Colombia_Temperature.csv",sep=";")
Colombia_Temp$Date<-as.Date(Colombia_Temp$Date,format="%d/%m/%Y")
COVID_2_Day_Colombia<- COVID_2 %>% 
  filter(Country.Region %in% c("Colombia")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Colombia_Temp<-COVID_2_Day_Colombia %>%
  inner_join(Colombia_Temp,by=c("Date2"="Date"))

COVID_Day_series_Colombia_Temp<-xts(COVID_2_Day_Colombia_Temp$World_confirmed, order.by=COVID_2_Day_Colombia_Temp$Date2)
COVID_series_Colombia_Train<-COVID_Day_series_Colombia_Temp[1:74] #Until April 4th
COVID_series_Colombia_Test<-COVID_Day_series_Colombia_Temp[75:length(COVID_Day_series_Colombia_Temp)] #From April 5th onwards

Get the lagged difference:

plot(diff(COVID_series_Colombia_Train),type="l",main="1st ") 

Correlograms of first diference:

tsdisplay(diff(COVID_series_Colombia_Train))

Arima model: ARIMA(2,2,2)

#Auto Arima for Colombia:

ARIMA1_Colombia<-auto.arima(COVID_series_Colombia_Train,
                             xreg = COVID_2_Day_Colombia_Temp$Temperature_Colombia[1:74])
#ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Colombia)
Series: COVID_series_Colombia_Train 
Regression with ARIMA(2,2,2) errors 

Coefficients:
          ar1      ar2      ma1     ma2     xreg
      -0.2675  -0.8894  -0.0039  0.6242  -0.2343
s.e.   0.1037   0.1016   0.1495  0.1732   0.5167

sigma^2 estimated as 217.7:  log likelihood=-294.03
AIC=600.06   AICc=601.35   BIC=613.72

Training set error measures:
                   ME     RMSE      MAE MPE MAPE      MASE        ACF1
Training set 2.197353 14.04005 6.409043 NaN  Inf 0.3327597 -0.07966735

Forecast prediction to compare

Test_Colombia_Temp<-COVID_2_Day_Colombia_Temp$Temperature_Colombia[75:length(COVID_Day_series_Colombia_Temp)]


P_COL_1<-forecast(ARIMA1_Colombia,xreg = Test_Colombia_Temp)

Calculate errors:

RE_COL_1<-RE(P_COL_1$mean,as.vector(COVID_series_Colombia_Test))
MAPE_COL_1<-MAPE(P_COL_1$mean,as.vector(COVID_series_Colombia_Test))
MSE_COL_1<-MSE(P_COL_1$mean,as.vector(COVID_series_Colombia_Test))
PFA_COL_1<-PFA(P_COL_1$mean,as.vector(COVID_series_Colombia_Test))

Errors.COL_1<-c(RE_COL_1,MAPE_COL_1,PFA_COL_1,MSE_COL_1)
Errors.COL_1
[1] 1.013233e-01 9.089187e+00 2.222222e-01 7.381917e+04

Manual MAPE:

dd<-data.frame(Actual=as.vector(COVID_series_Colombia_Test),
               Fore=P_COL_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
[1] 9.089187

Model without temperature

ARIMA(2,2,2)

#Auto Arima for Colombia:
ARIMA2_Colombia<-auto.arima(COVID_series_Colombia_Train)
summary(ARIMA2_Colombia)
Series: COVID_series_Colombia_Train 
ARIMA(2,2,2) 

Coefficients:
          ar1      ar2     ma1     ma2
      -0.2672  -0.8913  0.0014  0.6244
s.e.   0.1037   0.1025  0.1486  0.1753

sigma^2 estimated as 215.1:  log likelihood=-294.13
AIC=598.26   AICc=599.17   BIC=609.65

Training set error measures:
                   ME     RMSE      MAE      MPE     MAPE      MASE        ACF1
Training set 2.199355 14.05992 6.130841 5.459672 19.97005 0.3183153 -0.08053967

Forecast prediction to compare

P_COL_2<-forecast(ARIMA2_Colombia,h=length(COVID_series_Colombia_Test))

Calculate errors:

RE_COL_2<-RE(P_COL_2$mean,as.vector(COVID_series_Colombia_Test))
MAPE_COL_2<-MAPE(P_COL_2$mean,as.vector(COVID_series_Colombia_Test))
MSE_COL_2<-MSE(P_COL_2$mean,as.vector(COVID_series_Colombia_Test))
PFA_COL_2<-PFA(P_COL_2$mean,as.vector(COVID_series_Colombia_Test))

Errors.COL_2<-c(RE_COL_2,MAPE_COL_2,MSE_COL_2,PFA_COL_2)
Errors.COL_2
[1] 1.008711e-01 9.052198e+00 7.314569e+04 2.222222e-01
Errors<-rbind(Errors.COL_1,Errors.COL_2)

rownames(Errors)<-c("ARIMAX(2,2,2)","ARIMA(2,2,2)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(2,2,2)","ARIMA(2,2,2)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))

Conclusion: Keep model without temperature

Make model with the overall series

#Auto Arima for Colombia:

ARIMA_Final_Colombia<-auto.arima(COVID_Day_series_Colombia_Temp)
summary(ARIMA_Final_Colombia)
Series: COVID_Day_series_Colombia_Temp 
ARIMA(4,2,1) 

Coefficients:
         ar1      ar2     ar3      ar4      ma1
      0.5647  -0.4749  0.5634  -0.5138  -0.6402
s.e.  0.1456   0.1196  0.1304   0.1189   0.1227

sigma^2 estimated as 706.6:  log likelihood=-378.97
AIC=769.93   AICc=771.07   BIC=784.3

Training set error measures:
                   ME     RMSE      MAE     MPE     MAPE      MASE       ACF1
Training set 3.992104 25.43636 12.38659 5.38642 19.71782 0.3561361 -0.1491261

Final forecast:

Future_Colombia_Temp<-Colombia_Temp$Temperature_Colombia[Colombia_Temp$Date>max(COVID_2$Date2)]

P_COL_Final<-forecast(ARIMA_Final_Colombia,h=length(Future_Colombia_Temp))
Low_lim_COL<-data.frame(P_COL_Final$lower)[,2]
Upp_lim_COL<-data.frame(P_COL_Final$upper)[,2]

For making the plot:

##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Colombia_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Colombia_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Colombia_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_COL <- xts(Low_lim_COL,order.by=per_2)
Forecast_COL <- xts(P_COL_Final$mean,order.by=per_2)
Upp_lim_COL <- xts(Upp_lim_COL,order.by=per_2)
predNA <- rep(NA, length(Forecast_COL))
B <- cbind(predNA, Low_lim_COL, Forecast_COL, Upp_lim_COL)

all_series_COL <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_COL) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
dygraph(all_series_COL, main="SARS-COV2-outbreak: Total Colombia cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(17/255,57/255,141/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(246/255,209/255,75/255)) %>% 
  dyRangeSelector()

NA

Chile

Model with temperature

Chile_Temp<-read.csv("Chile_Temperature.csv",sep=";")
Chile_Temp$Date<-as.Date(Chile_Temp$Date,format="%d/%m/%Y")
COVID_2_Day_Chile<- COVID_2 %>% 
  filter(Country.Region %in% c("Chile")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Chile_Temp<-COVID_2_Day_Chile %>%
  inner_join(Chile_Temp,by=c("Date2"="Date"))

COVID_Day_series_Chile_Temp<-xts(COVID_2_Day_Chile_Temp$World_confirmed, order.by=COVID_2_Day_Chile_Temp$Date2)
COVID_series_Chile_Train<-COVID_Day_series_Chile_Temp[1:76] #Until April 6th
COVID_series_Chile_Test<-COVID_Day_series_Chile_Temp[77:length(COVID_Day_series_Chile_Temp)] #From April 7th onwards

Get the lagged difference:

plot(diff(COVID_series_Chile_Train),type="l",main="1st ") 

Correlograms of first diference:

tsdisplay(diff(COVID_series_Chile_Train))

Arima model: ARIMA(1,1,0)

#Auto Arima for Chile:

ARIMA1_Chile<-auto.arima(COVID_series_Chile_Train,
                             xreg = COVID_2_Day_Chile_Temp$Temperature_Chile[1:76])
#ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Chile)
Series: COVID_series_Chile_Train 
Regression with ARIMA(1,1,0) errors 

Coefficients:
         ar1     xreg
      0.9734  -1.3786
s.e.  0.0265   1.2712

sigma^2 estimated as 1708:  log likelihood=-386
AIC=778.01   AICc=778.35   BIC=784.96

Training set error measures:
                   ME     RMSE      MAE MPE MAPE      MASE       ACF1
Training set 6.143615 40.50828 20.18216 NaN  Inf 0.3143638 -0.5175849

Forecast prediction to compare

Test_Chile_Temp<-COVID_2_Day_Chile_Temp$Temperature_Chile[77:length(COVID_Day_series_Chile_Temp)]


P_CHL_1<-forecast(ARIMA1_Chile,xreg = Test_Chile_Temp)

Calculate errors:

RE_CHL_1<-RE(P_CHL_1$mean,as.vector(COVID_series_Chile_Test))
MAPE_CHL_1<-MAPE(P_CHL_1$mean,as.vector(COVID_series_Chile_Test))
MSE_CHL_1<-MSE(P_CHL_1$mean,as.vector(COVID_series_Chile_Test))
PFA_CHL_1<-PFA(P_CHL_1$mean,as.vector(COVID_series_Chile_Test))

Errors.CHL_1<-c(RE_CHL_1,MAPE_CHL_1,MSE_CHL_1,PFA_CHL_1)
Errors.CHL_1
[1] 4.702935e-02 4.379044e+00 1.290917e+05 1.428571e-01

Manual MAPE:

dd<-data.frame(Actual=as.vector(COVID_series_Chile_Test),
               Fore=P_CHL_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
[1] 4.379044

Model without temperature

ARIMA(1,2,4)

#Auto Arima for Colombia:
ARIMA2_Chile<-auto.arima(COVID_series_Chile_Train)
summary(ARIMA2_Chile)
Series: COVID_series_Chile_Train 
ARIMA(1,2,4) 

Coefficients:
         ar1      ma1     ma2     ma3      ma4
      0.8988  -1.8812  1.0871  0.0198  -0.0671
s.e.  0.0654   0.1487  0.2748  0.2611   0.1410

sigma^2 estimated as 843.5:  log likelihood=-354.47
AIC=720.93   AICc=722.19   BIC=734.76

Training set error measures:
                   ME     RMSE      MAE      MPE    MAPE     MASE        ACF1
Training set 2.726442 27.67343 12.61158 8.098567 15.1208 0.196442 -0.02684075

Forecast prediction to compare

P_CHL_2<-forecast(ARIMA2_Chile,h=length(COVID_series_Chile_Test))

Calculate errors:

RE_CHL_2<-RE(P_CHL_2$mean,as.vector(COVID_series_Chile_Test))
MAPE_CHL_2<-MAPE(P_CHL_2$mean,as.vector(COVID_series_Chile_Test))
MSE_CHL_2<-MSE(P_CHL_2$mean,as.vector(COVID_series_Chile_Test))
PFA_CHL_2<-PFA(P_CHL_2$mean,as.vector(COVID_series_Chile_Test))

Errors.CHL_2<-c(RE_CHL_2,MAPE_CHL_2,MSE_CHL_2,PFA_CHL_2)
Errors.CHL_2
[1] 2.831325e-02 2.665247e+00 4.677021e+04 1.428571e-01
Errors<-rbind(Errors.CHL_1,Errors.CHL_2)

rownames(Errors)<-c("ARIMAX(1,1,0)","ARIMA(1,2,4)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(1,1,0)","ARIMA(1,2,4)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))

Conclusion: Keep model without temperature

Make model with the overall series

#Auto Arima for Chile:

ARIMA_Final_Chile<-auto.arima(COVID_Day_series_Chile_Temp)
summary(ARIMA_Final_Chile)
Series: COVID_Day_series_Chile_Temp 
ARIMA(0,2,1) 

Coefficients:
          ma1
      -0.4303
s.e.   0.1014

sigma^2 estimated as 1960:  log likelihood=-421.54
AIC=847.08   AICc=847.24   BIC=851.87

Training set error measures:
                   ME   RMSE      MAE      MPE    MAPE     MASE       ACF1
Training set 6.995765 43.459 20.42083 5.151059 16.0157 0.222526 -0.0332737

Final forecast:

Future_Chile_Temp<-Chile_Temp$Temperature_Chile[Chile_Temp$Date>max(COVID_2$Date2)]

P_CHL_Final<-forecast(ARIMA_Final_Chile,h=length(Future_Chile_Temp))
Low_lim_CHL<-data.frame(P_CHL_Final$lower)[,2]
Upp_lim_CHL<-data.frame(P_CHL_Final$upper)[,2]

For making the plot:

##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Chile_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Chile_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Chile_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_CHL <- xts(Low_lim_CHL,order.by=per_2)
Forecast_CHL <- xts(P_CHL_Final$mean,order.by=per_2)
Upp_lim_CHL <- xts(Upp_lim_CHL,order.by=per_2)
predNA <- rep(NA, length(Forecast_CHL))
B <- cbind(predNA, Low_lim_CHL, Forecast_CHL, Upp_lim_CHL)

all_series_CHL <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_CHL) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
dygraph(all_series_CHL, main="SARS-COV2-outbreak: Total Chile cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(196/255,60/255,44/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(16/255,59/255,160/255)) %>% 
  dyRangeSelector()

NA
#Forecasts<-list(all_series_CRI,all_series_MEX,all_series_ITA,all_series_LBN,all_series_COL,
                #all_series_CHL)
#names(Forecasts)<-c("Costa Rica","Mexico","Italy","Lebanon","Colombia","Chile")
#Forecasts$Mexico

#save(Forecasts, file="Forecasts.RData")
---
title: "COVID-19 Outbreak: Worldwide analysis"
author: "Aoun, Camargo, Martinez, Rodriguez"
output: 
  html_notebook:
    toc: true
    toc_depth: 3
    toc_float:
      collapsed: true
      smooth_scroll: true
    theme: cosmo
     
---
![](Coronavirus.jpg)

# Quick overview

## Current status


```{r,message=FALSE,warning=FALSE}
#library(nCov2019)
library(leaflet)
library(dplyr)
library(ggplot2)
library(plotly)
library(scales)
library(xts)
library(dygraphs)
library(corrplot)
library(lubridate)
library(fmsb)
library(forecast)
```

```{r}
COVID<-read.csv("covid_19_data.csv")
COVID_2<-read.csv("COVID19_13-Apr.csv")
```

Format date:
```{r}
Date<-as.Date(COVID_2$Date, format="%m/%d/%y") 

COVID_2$Date2<-Date
```

```{r}
COVID_updated<-COVID_2 %>% filter(Date2==max(Date2))
```

```{r,warning=FALSE,message=FALSE}
leaflet(width = "100%") %>% 
  addProviderTiles("CartoDB.DarkMatter") %>% 
  setView(lng = 0, lat = 10, zoom = 1.5) %>% 
  addCircleMarkers(data = COVID_updated, 
                   lng = ~ Long,
                   lat = ~ Lat,
                   radius = ~ log(Confirmed+1),
                   color = rgb(218/255,65/255,56/255),
                   fillOpacity = ~ ifelse(Confirmed > 0, 1, 0),
                   stroke = FALSE,
                   label = ~ paste(Province.State,",",Country.Region, ": ", Confirmed)
                   )
```

Current top 10 countries:
```{r}
COVID_top<-COVID_2 %>% filter(Date2==max(Date2)) %>% 
  group_by(Country.Region) %>% summarise(Total_confirmed=sum(Confirmed)) %>% 
  top_n(10,Total_confirmed) %>% arrange(desc(Total_confirmed))
```

```{r}
plot<-ggplot(data=COVID_top
       , aes(x=Total_confirmed,y=reorder(Country.Region,Total_confirmed))) +
  geom_bar(stat ="identity",alpha=0.8,fill="firebrick3") +
  geom_text(aes(label=Total_confirmed), vjust=0.5, hjust=0.9,color="black", size=3.5) +
  scale_x_continuous(labels = comma) +
  labs(title = paste("Top 10 countries with confirmed cases as of ",max(COVID_2$Date2)),
       x = "Confirmed cases",
       y = "Country") +
  theme_minimal()

ggplotly(plot,tooltip = c("x"),width=750)
```

Time distribution:
```{r}
COVID_2_Day<- COVID_2 %>% group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed),
                                                        World_deaths=sum(Deaths),
                                                        World_recovered=sum(Recovered))


COVID_Day_confirmed_series<-xts(COVID_2_Day$World_confirmed, order.by=COVID_2_Day$Date2)
COVID_Day_deaths_series<-xts(COVID_2_Day$World_deaths, order.by=COVID_2_Day$Date2)
COVID_Day_recovered_series<-xts(COVID_2_Day$World_recovered, order.by=COVID_2_Day$Date2)

Day_summary<-cbind(COVID_Day_confirmed_series,COVID_Day_deaths_series,COVID_Day_recovered_series)
```

```{r}
dygraph(Day_summary, main = "SARS-COV2-outbreak: Total worldwide cases", 
        xlab="Date", ylab="Total cases",width = 750) %>% 
  dySeries("COVID_Day_confirmed_series", "Total cases",drawPoints = TRUE, 
           pointSize = 3, color=rgb(53/255,116/255,199/255)) %>% 
  dySeries("COVID_Day_deaths_series", "Total deaths",drawPoints = TRUE, 
           pointSize = 3, color=rgb(189/255,55/255,48/255)) %>% 
  dySeries("COVID_Day_recovered_series", "Total recovered",drawPoints = TRUE, 
           pointSize = 3, color=rgb(69/255,136/255,51/255)) %>% 
  dyRangeSelector()
```


```{r}
New_count<-function(x)
{
  Daily_cases<-numeric(length(x))
  
  for(i in length(x):2)
  {
    Daily_cases[i]=x[i] - x[i-1]
  }
  return(Daily_cases)
}

New_cases<-New_count(COVID_2_Day$World_confirmed)
New_deaths<-New_count(COVID_2_Day$World_deaths)
New_recovered<-New_count(COVID_2_Day$World_recovered)
COVID_New_confirmed_series<-xts(New_cases, order.by=COVID_2_Day$Date2)
COVID_New_deaths_series<-xts(New_deaths, order.by=COVID_2_Day$Date2)
COVID_New_recovered_series<-xts(New_recovered, order.by=COVID_2_Day$Date2)

New_summary<-cbind(COVID_New_confirmed_series,COVID_New_deaths_series,COVID_New_recovered_series)
```

```{r}
dygraph(New_summary, main = "SARS-COV2-outbreak: Total worldwide cases", 
        xlab="Date", ylab="Novel coronavirus cases",width = 750) %>% 
  dySeries("COVID_New_confirmed_series", "New cases",drawPoints = TRUE, 
           pointSize = 3, color=rgb(53/255,116/255,199/255)) %>% 
  dySeries("COVID_New_deaths_series", "New deaths",drawPoints = TRUE, 
           pointSize = 3, color=rgb(189/255,55/255,48/255)) %>% 
  dySeries("COVID_New_recovered_series", "New recovered",drawPoints = TRUE, 
           pointSize = 3, color=rgb(69/255,136/255,51/255)) %>% 
  dyRangeSelector()
```


Team members countries total cases:
```{r}
COVID_2_Day_Lebanon<- COVID_2 %>% 
  filter(Country.Region %in% c("Lebanon")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Chile<- COVID_2 %>% 
  filter(Country.Region %in% c("Chile")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Colombia<- COVID_2 %>% 
  filter(Country.Region %in% c("Colombia")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_CostaRica<- COVID_2 %>% 
  filter(Country.Region %in% c("Costa Rica")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))


COVID_Day_series_Lebanon<-xts(COVID_2_Day_Lebanon$World_confirmed, order.by=COVID_2_Day_Lebanon$Date2)
COVID_Day_series_Chile<-xts(COVID_2_Day_Chile$World_confirmed, order.by=COVID_2_Day_Chile$Date2)
COVID_Day_series_Colombia<-xts(COVID_2_Day_Colombia$World_confirmed, order.by=COVID_2_Day_Colombia$Date2)
COVID_Day_series_CostaRica<-xts(COVID_2_Day_CostaRica$World_confirmed, order.by=COVID_2_Day_CostaRica$Date2)

Our_Countries<-cbind(COVID_Day_series_Lebanon,COVID_Day_series_Chile,COVID_Day_series_Colombia,COVID_Day_series_CostaRica)

```

```{r}
dygraph(Our_Countries, main = "SARS-COV2-outbreak: Total cases by country", xlab="Date", ylab="Total cases",width = 750) %>% 
  dySeries("COVID_Day_series_Lebanon", "Lebanon",drawPoints = TRUE, 
           pointSize = 3, color=rgb(0,0,3/255)) %>% 
  dySeries("COVID_Day_series_Chile", "Chile",drawPoints = TRUE, 
           pointSize = 3,color=rgb(120/255,28/255,109/255)) %>% 
  dySeries("COVID_Day_series_Colombia", "Colombia",drawPoints = TRUE, 
           pointSize = 3,color=rgb(237/255,105/255,37/255)) %>% 
  dySeries("COVID_Day_series_CostaRica", "Costa Rica",drawPoints = TRUE,
           pointSize = 3,color=rgb(204/255,197/255,126/255)) %>% 
  dyRangeSelector()
```

```{r}
New_Lebanon<-New_count(COVID_2_Day_Lebanon$World_confirmed)
New_Chile<-New_count(COVID_2_Day_Chile$World_confirmed)
New_Colombia<-New_count(COVID_2_Day_Colombia$World_confirmed)
New_CostaRica<-New_count(COVID_2_Day_CostaRica$World_confirmed)

COVID_New_series_Lebanon<-xts(New_Lebanon, order.by=COVID_2_Day_Lebanon$Date2)
COVID_New_series_Chile<-xts(New_Chile, order.by=COVID_2_Day_Chile$Date2)
COVID_New_series_Colombia<-xts(New_Colombia, order.by=COVID_2_Day_Colombia$Date2)
COVID_New_series_CostaRica<-xts(New_CostaRica, order.by=COVID_2_Day_CostaRica$Date2)

Our_New_Countries<-cbind(COVID_New_series_Lebanon,COVID_New_series_Chile,COVID_New_series_Colombia,COVID_New_series_CostaRica)
```

```{r}
dygraph(Our_New_Countries, main = "SARS-COV2-outbreak: New cases by country", xlab="Date", ylab="Total cases",width = 750) %>% 
  dySeries("COVID_New_series_Lebanon", "Lebanon",drawPoints = TRUE, 
           pointSize = 3, color=rgb(0,0,3/255)) %>% 
  dySeries("COVID_New_series_Chile", "Chile",drawPoints = TRUE, 
           pointSize = 3,color=rgb(120/255,28/255,109/255)) %>% 
  dySeries("COVID_New_series_Colombia", "Colombia",drawPoints = TRUE, 
           pointSize = 3,color=rgb(237/255,105/255,37/255)) %>% 
  dySeries("COVID_New_series_CostaRica", "Costa Rica",drawPoints = TRUE,
           pointSize = 3,color=rgb(204/255,197/255,126/255)) %>% 
  dyRangeSelector()
```

# Looking for correlations

```{r}
fig <- plot_ly(COVID_updated, x = ~Confirmed, y = ~Deaths, z = ~Recovered, width=750) %>% 
  add_markers(text= ~Country.Region ,hoverinfo= "text",
              marker = list(color=rgb(189/255,55/255,48/255))) %>% 
  layout(title="Confirmed cases Vs. Deaths Vs. Recovered", scene = list(
                    xaxis = list(title = 'Confirmed'),
                     yaxis = list(title = 'Deaths'),
                     zaxis = list(title = 'Recovered'))) 
fig
```
## For the number of cases

### Human Development Index

```{r}
HDI<-read.csv("Human Development Index (HDI)_2.csv",sep=";",dec=",")
```

```{r}
COVID_Country<-COVID_2 %>% filter(Date2==max(Date2)) %>% 
  group_by(Country.Region) %>% summarise(Total_confirmed=sum(Confirmed),
                                         Total_deaths=sum(Deaths),
                                         Total_Recovered=sum(Recovered))
```

Remove after parentheses:
```{r}
HDI$Country_2<-gsub("\\s*\\([^\\)]+\\)","",as.character(HDI$Country))
```

```{r}
HDI$Country_2[HDI$Country_2=="United States"]<-"US"
HDI$Country_2[HDI$Country_2=="Korea"]<-"South Korea"
```

Population:
```{r}
Population<-read.csv("World_population.csv",sep=";",dec=",")
```

Remove after commma:
```{r}
Population$Country_Name_2<-gsub(",.*", "", as.character(Population$Country_Name))
```

```{r}
Population$Country_Name_2[Population$Country_Name_2=="United States"]<-"US"
Population$Country_Name_2[Population$Country_Code=="KOR"]<-"South Korea"
Population$Country_Name_2[Population$Country_Code=="CZE"]<-"Czechia"
```

Natural Join:
```{r,warning=FALSE,message=FALSE}
COVID_3<- COVID_Country %>% inner_join(HDI,by=c("Country.Region"="Country_2")) %>% 
  inner_join(Population,by=c("Country.Region"="Country_Name_2")) %>% 
  select(Country.Region,Total_confirmed,Total_deaths,Total_Recovered,HDI_Rank_2018,Year_2018,
         Country_Code,Population_2018) %>% 
  mutate(Cases_million=(Total_confirmed/Population_2018)*1000000,
         Recovered_percentage=(Total_Recovered/Total_confirmed)*100)  

COVID_3<-COVID_3[!is.na(COVID_3$Population_2018),]

```

Countries with top 10 cases per million inhabitants
```{r}
COVID3_top<-COVID_3 %>% top_n(10,Cases_million) %>% arrange(desc(Cases_million)) %>% 
  mutate(Cases_million=round(Cases_million,0))
```

```{r}
plot<-ggplot(data=COVID3_top
       , aes(x=Cases_million,y=reorder(Country.Region,Cases_million))) +
  geom_bar(stat ="identity",alpha=0.8,fill="sky blue", ) +
  geom_text(aes(label=Cases_million), vjust=0.5, hjust=0.9,color="black", size=3.5) +
  scale_x_continuous(labels = comma) +
  labs(title = paste("Top 10 countries with confirmed cases per million habitant \n as of ",max(COVID_2$Date2)),
       x = "Confirmed cases per million habitants",
       y = "Country") +
  theme_minimal()

ggplotly(plot,tooltip = c("x"),width=750)
```


Plot the Human Development Index(HDI) Vs. the number of cases (applying a log transformation), and the proportion of recovered cases:
```{r}
plot<-ggplot(data=COVID_3,aes(x=log(Cases_million),y=Year_2018,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(237/255,105/255,37/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="HDI Vs. logarithmus of COVID-19 cases by million inhabitants \n and proportion of recovered",
       x="ln(Cases/1M population)",
       y="HDI")

ggplotly(plot,tooltip = c("text"),width=750)
```


```{r}
COVID_numeric_1<-COVID_3 %>% mutate(Log_cases=log(Cases_million),
                                    Death_percentage=(Total_deaths/Total_confirmed)*100) %>% 
  select(Log_cases,Recovered_percentage,Death_percentage,Year_2018)

corrplot(cor(COVID_numeric_1),method = "number",tl.col="black",tl.srt=15,
         col=colorRampPalette(c(rgb(204/255,197/255,126/255),rgb(237/255,105/255,37/255)))(200))
```

```{r}
Measles<-read.csv("Measles_immunization.csv",sep=";",dec=".")
```

```{r}
Measles$Country_2<-gsub("\\s*\\([^\\)]+\\)","",as.character(Measles$Country))
```

```{r}
Measles$Country_2[Measles$Country_2=="United States"]<-"US"
Measles$Country_2[Measles$Country_2=="Korea"]<-"South Korea"
```

```{r}
COVID_3<- COVID_3 %>% inner_join(Measles,by=c("Country.Region"="Country_2")) %>% select(-c("Country"))
```


### Health expenditure (% of GDP)

```{r}
Health_expenditure<-read.csv("Health_expenditure_GDP.csv",sep=";",dec=".")
```

```{r}
Health_expenditure$Country_2<-gsub("\\s*\\([^\\)]+\\)","",
                                   as.character(Health_expenditure$Country))
```

```{r}
Health_expenditure$Country_2[Health_expenditure$Country_2=="United States"]<-"US"
Health_expenditure$Country_2[Health_expenditure$Country_2=="Korea"]<-"South Korea"
```


```{r}
COVID_3<- COVID_3 %>% inner_join(Health_expenditure,by=c("Country.Region"="Country_2")) %>% select(-c("Country"))
```

```{r}
plot<-ggplot(data=COVID_3,aes(x=log(Cases_million),y=Expenditure_2016,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(204/255,197/255,126/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="Health expenditure (% of GDP) Vs. logarithmus of COVID-19 cases by million inhabitants \n and proportion of recovered",
       x="ln(Cases/1M population)",
       y="% of GDP in health")

ggplotly(plot,tooltip = c("text"),width=750)
```

### Temperature

```{r}
Temperature<-read.csv("GlobalLandTemperaturesByCountry.csv",sep=";")
```

```{r}
Date_Temp<-as.Date(Temperature$dt, format="%d/%m/%Y") 

Temperature$Date_Temp<-Date_Temp
```

Extract month, filter IQ (Jan, Feb and Mar) and obtain the average IQ temperature:
```{r}
Temperature<- Temperature %>% mutate(Month=month(Temperature$Date_Temp,label =TRUE),
                                     Year=year(Temperature$Date_Temp)) %>% 
  filter(Month %in% c("Jan","Feb","Mar") & Year>=2000)  %>% 
  group_by(Country) %>% summarise(Avg_IQ_temperature=mean(AverageTemperature))

Temperature<-Temperature[!is.na(Temperature$Avg_IQ_temperature),]
```

```{r}
Temperature$Country_2<-gsub("\\s*\\([^\\)]+\\)","",
                                   as.character(Temperature$Country))
```

```{r}
Temperature$Country_2[Temperature$Country_2=="United States"]<-"US"
Temperature$Country_2[Temperature$Country_2=="Korea"]<-"South Korea"
```

```{r}
COVID_3<- COVID_3 %>% inner_join(Temperature,by=c("Country.Region"="Country_2")) %>% select(-c("Country"))

#write.csv(COVID_3,"COVID_Covariables.csv")
```

```{r}
plot<-ggplot(data=COVID_3,aes(x=log(Cases_million),y=Avg_IQ_temperature,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(120/255,28/255,109/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="Average IQ temperature Vs. logarithmus of COVID-19 cases by million inhabitants \n and proportion of recovered",
       x="ln(Cases/1M population)",
       y="Temperature (Celcius)")

ggplotly(plot,tooltip = c("text"),width=750)
```

## For the number of deaths

###DTP immunization

```{r}
DTP_immunization<-read.csv("Infants_lacking_immunization_DTP.csv",sep=";")
```

Remove after parentheses:
```{r}
DTP_immunization$Country_2<-gsub("\\s*\\([^\\)]+\\)","",
                                 as.character(DTP_immunization$Country))
```


```{r}
DTP_immunization$Country_2[DTP_immunization$Country_2=="United States"]<-"US"
DTP_immunization$Country_2[DTP_immunization$Country_2=="Korea"]<-"South Korea"
```


```{r,warning=FALSE,message=FALSE}
COVID_4<- COVID_Country %>% inner_join(DTP_immunization,
                                       by=c("Country.Region"="Country_2")) %>% 
  select(Country.Region,Total_confirmed,Total_deaths,Total_Recovered,
         Lack_DTP_inmmunization_2018) %>% 
  mutate(Recovered_percentage=(Total_Recovered/Total_confirmed)*100,
         Death_rate=(Total_deaths/Total_confirmed)*100)
```

```{r}
plot<-ggplot(data=COVID_4,aes(x=Death_rate,y=Lack_DTP_inmmunization_2018,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(120/255,28/255,109/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="% infants lacking DTP immunization Vs. death rate and \n proportion of recovered",
       x="Novel coronavirus death rate",
       y="% of infants")

ggplotly(plot,tooltip = c("text"),width=750)
```

### Infants lacking immunization, measles (% of one-year-olds)

```{r}
COVID_4<- COVID_4 %>% inner_join(Measles,by=c("Country.Region"="Country_2")) %>% select(-c("Country"))
```


```{r}
ggplot(COVID_4, aes(y=Measles_2018)) + 
  geom_boxplot(fill="dodgerblue4",outlier.shape = 21, 
               outlier.fill = "firebrick",alpha=0.75) +
  ggtitle("Boxplot of % infants lacking measles immunization") + ylab("% of infants") +
  theme_minimal()
```

```{r}
ggplot(COVID_4, aes(Measles_2018)) + 
  geom_histogram(fill="dodgerblue4",bins=20,alpha=0.8) +
  ggtitle("Histogram of % infants lacking measles immunization") + 
  xlab("% of infants") + 
  ylab("Count") +
  theme_minimal()
```

```{r}
plot<-ggplot(data=COVID_4,aes(x=Death_rate,y=Measles_2018,
                        size=Recovered_percentage,text=Country.Region)) +
  geom_point(color="black",fill=rgb(237/255,105/255,37/255),shape=21,alpha=0.6) +
  scale_size(range = c(3,15), name="Recovered \n percentage") +
  theme_minimal() + 
  theme(legend.position="bottom") +
  labs(title="% infants lacking measles immunization Vs. fatality rate \n and proportion of recovered",
       x="Fatality rate (%)",
       y="% of infants")

ggplotly(plot,tooltip = c("text"),width=750)
```


# Fitting a regression model

Transforming with ln and converting temperature as kelvin:
```{r}
Mod1<-lm(log(COVID_3$Cases_million)~log(COVID_3$Year_2018)+log(COVID_3$Measles_2018)+
           log(COVID_3$Expenditure_2016)+log(COVID_3$Avg_IQ_temperature+273.15))
summary(Mod1)
```

Stepwise with AIC critertion:
```{r}
Mod2<-step(Mod1,direction = "both")
```

```{r}
summary(Mod2)
```

Normality of residuals:
```{r}
hist(Mod2$residuals)
shapiro.test(Mod2$residuals)
```

Prediction power: separate between training and testing:
```{r}
set.seed(179819)
Sample <- sample(1:length(COVID_3$Cases_million),length(COVID_3$Cases_million)*0.20)
t.testing<- COVID_3[Sample,]
t.training<- COVID_3[-Sample,]
```

Transform the training and testing variables as before:
```{r}
t.training<-t.training %>% mutate(Cases_million_log=log(Cases_million),HDI_log=log(Year_2018),
                      GDP_log=log(Expenditure_2016),
                      Temperature_log_kelvin=log(Avg_IQ_temperature+273.15)) 

t.training<-t.training[,14:17]

t.testing<-t.testing %>% mutate(Cases_million_log=log(Cases_million),HDI_log=log(Year_2018),
                      GDP_log=log(Expenditure_2016),
                      Temperature_log_kelvin=log(Avg_IQ_temperature+273.15)) 

t.testing<-t.testing[,14:17]
```

Fit the same model with training
```{r}
Mod3<-lm(Cases_million_log~., data=t.training)
summary(Mod3)
```

Error functions:
```{r}
# Residual Sum of Square (RSS)
RSS<-function(Pred,Actual) {
  ss<-sum((Actual-Pred)^2)
  return(ss)
}

# Residual Standard Error (RSE)
RSE<-function(Pred,Real,NumPred) {
  N<-length(Real)-NumPred-1  
  ss<-sqrt((1/N)*RSS(Pred,Real))
  return(ss)
}
# Mean Squared Error 
MSE <- function(Pred,Actual) {
  N<-length(Actual)
  ss<-(1/N)*RSS(Pred,Actual)
  return(ss)
}

# Relative error
RelativeError<-function(Pred,Actual) {
  ss<-sum(abs(Actual-Pred))/sum(abs(Actual))
  return(ss)
}
```

Prediction:
```{r}
Pred<-predict(Mod3,t.testing)
```

Errors:
```{r}
RSS_Mod3<-RSS(Pred,t.testing$Cases_million_log)
RSE_Mod3<-RSE(Pred,t.testing$Cases_million_log,dim(t.testing)[2]-1)
MSE_Mod3<-MSE(Pred,t.testing$Cases_million_log)
RelativeError_Mod3<-RelativeError(Pred,t.testing$Cases_million_log)

Mod3_Errors<-c(RSS_Mod3,RSE_Mod3,MSE_Mod3,RelativeError_Mod3)
```

Now, a model without temperature:
```{r}
t.training <- t.training %>% select(-Temperature_log_kelvin)
t.testing <- t.testing %>% select(-Temperature_log_kelvin)
```

```{r}
Mod4<-lm(Cases_million_log~., data=t.training)
summary(Mod4)
```

Prediction:
```{r}
Pred<-predict(Mod4,t.testing)
```

Errors:
```{r}
RSS_Mod4<-RSS(Pred,t.testing$Cases_million_log)
RSE_Mod4<-RSE(Pred,t.testing$Cases_million_log,dim(t.testing)[2]-1)
MSE_Mod4<-MSE(Pred,t.testing$Cases_million_log)
RelativeError_Mod4<-RelativeError(Pred,t.testing$Cases_million_log)

Mod4_Errors<-c(RSS_Mod4,RSE_Mod4,MSE_Mod4,RelativeError_Mod4)
```

Create a radarplot:
```{r}
Errors<-rbind(Mod3_Errors,Mod4_Errors)

rownames(Errors)<-c("Model with temperature","Model without temperature")

colnames(Errors)<-c("Residual Sum of Square","Residual Standard Error","Mean Squared Error","Relative error")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
```

```{r}
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("With temperature","Without temperature"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))
```

# Forecast by country

## Republic of Costa Rica

![](CostaRicaFlag.png){width=40%}

*Model with temperature*

```{r}
CostaRica_Temp<-read.csv("CostaRica_Temperature.csv",sep=";")
CostaRica_Temp$Date<-as.Date(CostaRica_Temp$Date,format="%d/%m/%Y")
```

```{r}
COVID_2_Day_CostaRica_Temp<-COVID_2_Day_CostaRica %>%
  inner_join(CostaRica_Temp,by=c("Date2"="Date"))

COVID_Day_series_CostaRica_Temp<-xts(COVID_2_Day_CostaRica_Temp$World_confirmed, order.by=COVID_2_Day_CostaRica_Temp$Date2)
```

```{r}
COVID_series_CostaRica_Temp_Train<-COVID_Day_series_CostaRica_Temp[1:64] #Until March 25th
COVID_series_CostaRica_Temp_Test<-COVID_Day_series_CostaRica_Temp[65:length(COVID_Day_series_CostaRica_Temp)] #From March 26th onwards
```

Get the lagged difference:
```{r}
plot(diff(COVID_series_CostaRica_Temp_Train),type="l",main="1st ") 
```

Correlograms of first diference:
```{r}
tsdisplay(diff(COVID_series_CostaRica_Temp_Train))
```

Arima model: ARIMA(1,2,1)
```{r}
#Auto Arima for Costa Rica:

ARIMA1_CostaRica<-auto.arima(COVID_series_CostaRica_Temp_Train,
                             xreg = COVID_2_Day_CostaRica_Temp$Temperature_CostaRica[1:64])
summary(ARIMA1_CostaRica)
```

Predictive error functions:
```{r}
#Relative error
RE <- function(Fore,Actual) {
  return(sum(abs(Fore-Actual))/abs(sum(Actual)))
}


#MAPE
MAPE<-function(Fore,Actual){
  return(
    mean(abs(Actual-Fore)/abs(Actual))*100
    )
}

# mean squared error (MSE)
MSE<-function(Fore,Actual) {
  N<-length(Actual)
  ss<-sum((Actual-Fore)^2)
  return((1/N)*ss)
}

#PFA
PFA <- function(Fore,Actual) {
  Total<-0
  N<-length(Fore)
  for(i in 1:N) {
    if(Fore[i]>=Actual[i])
      Total<-Total+1      
  }
  return(Total/N)
}
```

Forecast prediction to compare
```{r}
Test_CostaRica_Temp<-COVID_2_Day_CostaRica_Temp$Temperature_CostaRica[65:length(COVID_Day_series_CostaRica_Temp)]


P_CRI_1<-forecast(ARIMA1_CostaRica,xreg = Test_CostaRica_Temp)
```

Calculate errors:
```{r}
RE_CRI_1<-RE(P_CRI_1$mean,as.vector(COVID_series_CostaRica_Temp_Test))
MAPE_CRI_1<-MAPE(P_CRI_1$mean,as.vector(COVID_series_CostaRica_Temp_Test))
MSE_CRI_1<-MSE(P_CRI_1$mean,as.vector(COVID_series_CostaRica_Temp_Test))
PFA_CRI_1<-PFA(P_CRI_1$mean,as.vector(COVID_series_CostaRica_Temp_Test))

Errors.CRI_1<-c(RE_CRI_1,MAPE_CRI_1,MSE_CRI_1,PFA_CRI_1)
Errors.CRI_1
```

Manual MAPE:
```{r}
dd<-data.frame(Actual=as.vector(COVID_series_CostaRica_Temp_Test),
               Fore=P_CRI_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
```

*Model without temperature*

ARIMA(1,2,0)
```{r}
#Auto Arima for Costa Rica:

ARIMA2_CostaRica<-auto.arima(COVID_series_CostaRica_Temp_Train)
summary(ARIMA2_CostaRica)
```

Forecast prediction to compare
```{r}
P_CRI_2<-forecast(ARIMA2_CostaRica,h=length(COVID_series_CostaRica_Temp_Test))
```

Calculate errors:
```{r}
RE_CRI_2<-RE(P_CRI_2$mean,as.vector(COVID_series_CostaRica_Temp_Test))
MAPE_CRI_2<-MAPE(P_CRI_2$mean,as.vector(COVID_series_CostaRica_Temp_Test))
MSE_CRI_2<-MSE(P_CRI_2$mean,as.vector(COVID_series_CostaRica_Temp_Test))
PFA_CRI_2<-PFA(P_CRI_2$mean,as.vector(COVID_series_CostaRica_Temp_Test))

Errors.CRI_2<-c(RE_CRI_2,MAPE_CRI_2,MSE_CRI_2,PFA_CRI_2)
Errors.CRI_2
```

```{r}
Errors<-rbind(Errors.CRI_1,Errors.CRI_2)

rownames(Errors)<-c("ARIMAX(1,2,1)","ARIMA(1,2,0)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
```

```{r}
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(1,2,1)","ARIMA(1,2,0)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))
```

Conclusion: Keep model with temperature

Make model with the overall series
```{r}
#Auto Arima for Costa Rica:

ARIMA_Final_CostaRica<-auto.arima(COVID_Day_series_CostaRica_Temp,
                             xreg = COVID_2_Day_CostaRica_Temp$Temperature_CostaRica)
summary(ARIMA_Final_CostaRica)
```

Final forecast:
```{r}
Future_CostaRica_Temp<-CostaRica_Temp$Temperature_CostaRica[CostaRica_Temp$Date>max(COVID_2$Date2)]

P_CRI_Final<-forecast(ARIMA_Final_CostaRica,xreg = Future_CostaRica_Temp)
Low_lim_CRI<-data.frame(P_CRI_Final$lower)[,2]
Upp_lim_CRI<-data.frame(P_CRI_Final$upper)[,2]
```

For making the plot:
```{r}
##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_CostaRica_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_CostaRica_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_CostaRica_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_CRI <- xts(Low_lim_CRI,order.by=per_2)
Forecast_CRI <- xts(P_CRI_Final$mean,order.by=per_2)
Upp_lim_CRI <- xts(Upp_lim_CRI,order.by=per_2)
predNA <- rep(NA, length(Forecast_CRI))
B <- cbind(predNA, Low_lim_CRI, Forecast_CRI, Upp_lim_CRI)

all_series_CRI <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_CRI) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
```

```{r}
dygraph(all_series_CRI, main="SARS-COV2-outbreak: Total Costa Rica cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(189/255,44/255,47/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(10/255,44/255,119/255)) %>% 
  dyRangeSelector()

```

## Mexico

![](MexicoFlag.png){width=40%}

*Model with temperature*

```{r}
Mexico_Temp<-read.csv("Mexico_Temperature.csv",sep=";")
Mexico_Temp$Date<-as.Date(Mexico_Temp$Date,format="%d/%m/%Y")
```

```{r}
COVID_2_Day_Mexico<- COVID_2 %>% 
  filter(Country.Region %in% c("Mexico")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Mexico_Temp<-COVID_2_Day_Mexico %>%
  inner_join(Mexico_Temp,by=c("Date2"="Date"))

COVID_Day_series_Mexico_Temp<-xts(COVID_2_Day_Mexico_Temp$World_confirmed, order.by=COVID_2_Day_Mexico_Temp$Date2)
```

```{r}
COVID_series_Mexico_Train<-COVID_Day_series_Mexico_Temp[1:74] #Until April 4th
COVID_series_Mexico_Test<-COVID_Day_series_Mexico_Temp[75:length(COVID_Day_series_Mexico_Temp)] #From April 5th onwards
```

Get the lagged difference:
```{r}
plot(diff(COVID_series_Mexico_Train),type="l",main="1st ") 
```

Correlograms of first diference:
```{r}
tsdisplay(diff(COVID_series_Mexico_Train))
```

Arima model: ARIMA(2,2,3)
```{r}
#Auto Arima for Mexico:

#ARIMA1_Mexico<-auto.arima(COVID_series_Mexico_Train,
                             #xreg = COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Mexico)
```

Forecast prediction to compare
```{r}
Test_Mexico_Temp<-COVID_2_Day_Mexico_Temp$Temperature_Mexico[75:length(COVID_Day_series_Mexico_Temp)]


P_MEX_1<-forecast(ARIMA1_Mexico,xreg = Test_Mexico_Temp)
```

Calculate errors:
```{r}
RE_MEX_1<-RE(P_MEX_1$mean,as.vector(COVID_series_Mexico_Test))
MAPE_MEX_1<-MAPE(P_MEX_1$mean,as.vector(COVID_series_Mexico_Test))
MSE_MEX_1<-MSE(P_MEX_1$mean,as.vector(COVID_series_Mexico_Test))
PFA_MEX_1<-PFA(P_MEX_1$mean,as.vector(COVID_series_Mexico_Test))

Errors.MEX_1<-c(RE_MEX_1,MAPE_MEX_1,MSE_MEX_1,PFA_MEX_1)
Errors.MEX_1
```

Manual MAPE:
```{r}
dd<-data.frame(Actual=as.vector(COVID_series_Mexico_Test),
               Fore=P_MEX_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
```

*Model without temperature*

ARIMA(2,2,4)
```{r}
#Auto Arima for Mexico:

ARIMA2_Mexico<-auto.arima(COVID_series_Mexico_Train)
summary(ARIMA2_Mexico)
```

Forecast prediction to compare
```{r}
P_MEX_2<-forecast(ARIMA2_Mexico,h=length(COVID_series_Mexico_Test))
```

Calculate errors:
```{r}
RE_MEX_2<-RE(P_MEX_2$mean,as.vector(COVID_series_Mexico_Test))
MAPE_MEX_2<-MAPE(P_MEX_2$mean,as.vector(COVID_series_Mexico_Test))
MSE_MEX_2<-MSE(P_MEX_2$mean,as.vector(COVID_series_Mexico_Test))
PFA_MEX_2<-PFA(P_MEX_2$mean,as.vector(COVID_series_Mexico_Test))

Errors.MEX_2<-c(RE_MEX_2,MAPE_MEX_2,MSE_MEX_2,PFA_MEX_2)
Errors.MEX_2
```

```{r}
Errors<-rbind(Errors.MEX_1,Errors.MEX_2)

rownames(Errors)<-c("ARIMAX(2,2,3)","ARIMA(2,2,4)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
```

```{r}
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(2,2,3)","ARIMA(2,2,4)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))
```

Conclusion: Keep model with temperature

Make model with the overall series
```{r}
#Auto Arima for Mexico:

ARIMA_Final_Mexico<-auto.arima(COVID_Day_series_Mexico_Temp,
                             xreg = COVID_2_Day_Mexico_Temp$Temperature_Mexico)
summary(ARIMA_Final_Mexico)
```

Final forecast:
```{r}
Future_Mexico_Temp<-Mexico_Temp$Temperature_Mexico[Mexico_Temp$Date>max(COVID_2$Date2)]

P_MEX_Final<-forecast(ARIMA_Final_Mexico,xreg = Future_Mexico_Temp)
Low_lim_MEX<-data.frame(P_MEX_Final$lower)[,2]
Upp_lim_MEX<-data.frame(P_MEX_Final$upper)[,2]
```

For making the plot:
```{r}
##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Mexico_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Mexico_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Mexico_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_MEX <- xts(Low_lim_MEX,order.by=per_2)
Forecast_MEX <- xts(P_MEX_Final$mean,order.by=per_2)
Upp_lim_MEX <- xts(Upp_lim_MEX,order.by=per_2)
predNA <- rep(NA, length(Forecast_MEX))
B <- cbind(predNA, Low_lim_MEX, Forecast_MEX, Upp_lim_MEX)

all_series_MEX <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_MEX) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
```

```{r}
dygraph(all_series_MEX, main="SARS-COV2-outbreak: Total Mexico cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(189/255,44/255,47/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(43/255,102/255,73/255)) %>% 
  dyRangeSelector()

```

## Italy

![](ItalyFlag.png){width=40%}

*Model with temperature*

```{r}
Italy_Temp<-read.csv("Italy_Temperature.csv",sep=";")
Italy_Temp$Date<-as.Date(Italy_Temp$Date,format="%d/%m/%Y")
```

```{r}
COVID_2_Day_Italy<- COVID_2 %>% 
  filter(Country.Region %in% c("Italy")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Italy_Temp<-COVID_2_Day_Italy %>%
  inner_join(Italy_Temp,by=c("Date2"="Date"))

COVID_Day_series_Italy_Temp<-xts(COVID_2_Day_Italy_Temp$World_confirmed, order.by=COVID_2_Day_Italy_Temp$Date2)
```

```{r}
COVID_Day_series_Italy_Train<-COVID_Day_series_Italy_Temp[1:64] #Until March 25th
COVID_Day_series_Italy_Test<-COVID_Day_series_Italy_Temp[65:length(COVID_Day_series_Italy_Temp)] #From March 26th onwards
```

Get the lagged difference:
```{r}
plot(diff(COVID_Day_series_Italy_Train),type="l",main="1st ") 
```

Correlograms of first diference:
```{r}
tsdisplay(diff(COVID_Day_series_Italy_Train))
```

Arima model: ARIMA(1,2,0)
```{r}
#Auto Arima for Italy:

ARIMA1_Italy<-auto.arima(COVID_Day_series_Italy_Train,
                             xreg = COVID_2_Day_Italy_Temp$Temperature_Italy[1:64])
#ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Italy)
```

Forecast prediction to compare
```{r}
Test_Italy_Temp<-COVID_2_Day_Italy_Temp$Temperature_Italy[65:length(COVID_Day_series_Italy_Temp)]


P_ITA_1<-forecast(ARIMA1_Italy,xreg = Test_Italy_Temp)
```

Calculate errors:
```{r}
RE_ITA_1<-RE(P_ITA_1$mean,as.vector(COVID_Day_series_Italy_Test))
MAPE_ITA_1<-MAPE(P_ITA_1$mean,as.vector(COVID_Day_series_Italy_Test))
MSE_ITA_1<-MSE(P_ITA_1$mean,as.vector(COVID_Day_series_Italy_Test))
PFA_ITA_1<-PFA(P_ITA_1$mean,as.vector(COVID_Day_series_Italy_Test))

Errors.ITA_1<-c(RE_ITA_1,MAPE_ITA_1,MSE_ITA_1,PFA_ITA_1)
Errors.ITA_1
```

Manual MAPE:
```{r}
dd<-data.frame(Actual=as.vector(COVID_Day_series_Italy_Test),
               Fore=P_ITA_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
```

*Model without temperature*

ARIMA(1,2,0)
```{r}
#Auto Arima for Italy:
ARIMA2_Italy<-auto.arima(COVID_Day_series_Italy_Train)
summary(ARIMA2_Italy)
```

Forecast prediction to compare
```{r}
P_ITA_2<-forecast(ARIMA2_Italy,h=length(COVID_Day_series_Italy_Test))
```

Calculate errors:
```{r}
RE_ITA_2<-RE(P_ITA_2$mean,as.vector(COVID_Day_series_Italy_Test))
MAPE_ITA_2<-MAPE(P_ITA_2$mean,as.vector(COVID_Day_series_Italy_Test))
MSE_ITA_2<-MSE(P_ITA_2$mean,as.vector(COVID_Day_series_Italy_Test))
PFA_ITA_2<-PFA(P_ITA_2$mean,as.vector(COVID_Day_series_Italy_Test))

Errors.ITA_2<-c(RE_ITA_2,MAPE_ITA_2,MSE_ITA_2,PFA_ITA_2)
Errors.ITA_2
```

```{r}
Errors<-rbind(Errors.ITA_1,Errors.ITA_2)

rownames(Errors)<-c("ARIMAX(1,2,0)","ARIMA(1,2,0)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
```

```{r}
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(1,2,0)","ARIMA(1,2,0)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))
```

Conclusion: Keep model without temperature

Make model with the overall series
```{r}
#Auto Arima for Italy:

ARIMA_Final_Italy<-auto.arima(COVID_Day_series_Italy_Temp)
summary(ARIMA_Final_Italy)
```

Final forecast:
```{r}
Future_Italy_Temp<-Italy_Temp$Temperature_Italy[Italy_Temp$Date>max(COVID_2$Date2)]

P_ITA_Final<-forecast(ARIMA_Final_Italy,h=length(Future_Italy_Temp))
Low_lim_ITA<-data.frame(P_ITA_Final$lower)[,2]
Upp_lim_ITA<-data.frame(P_ITA_Final$upper)[,2]
```

For making the plot:
```{r}
##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Italy_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Italy_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Italy_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_ITA <- xts(Low_lim_ITA,order.by=per_2)
Forecast_ITA <- xts(P_ITA_Final$mean,order.by=per_2)
Upp_lim_ITA <- xts(Upp_lim_ITA,order.by=per_2)
predNA <- rep(NA, length(Forecast_ITA))
B <- cbind(predNA, Low_lim_ITA, Forecast_ITA, Upp_lim_ITA)

all_series_ITA <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_ITA) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
```

```{r}
dygraph(all_series_ITA, main="SARS-COV2-outbreak: Total Italy cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(190/255,59/255,61/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(64/255,143/255,78/255)) %>% 
  dyRangeSelector()

```


## Lebanon

![](LebanonFlag.png){width=40%}

*Model with temperature*

```{r}
Lebanon_Temp<-read.csv("Lebanon_Temperature.csv",sep=";")
Lebanon_Temp$Date<-as.Date(Lebanon_Temp$Date,format="%d/%m/%Y")
```

```{r}
COVID_2_Day_Lebanon<- COVID_2 %>% 
  filter(Country.Region %in% c("Lebanon")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Lebanon_Temp<-COVID_2_Day_Lebanon %>%
  inner_join(Lebanon_Temp,by=c("Date2"="Date"))

COVID_Day_series_Lebanon_Temp<-xts(COVID_2_Day_Lebanon_Temp$World_confirmed, order.by=COVID_2_Day_Lebanon_Temp$Date2)
```

```{r}
COVID_series_Lebanon_Train<-COVID_Day_series_Lebanon_Temp[1:74] #Until April 4th
COVID_series_Lebanon_Test<-COVID_Day_series_Lebanon_Temp[75:length(COVID_Day_series_Lebanon_Temp)] #From April 4th onwards
```

Get the lagged difference:
```{r}
plot(diff(COVID_series_Lebanon_Train),type="l",main="1st ") 
```

Correlograms of first diference:
```{r}
tsdisplay(diff(COVID_series_Lebanon_Train))
```

Arima model: ARIMA(1,1,2)
```{r}
#Auto Arima for Lebanon:

ARIMA1_Lebanon<-auto.arima(COVID_series_Lebanon_Train,
                             xreg = COVID_2_Day_Lebanon_Temp$Temperature_Lebanon[1:74])
#ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Lebanon)
```

Forecast prediction to compare
```{r}
Test_Lebanon_Temp<-COVID_2_Day_Lebanon_Temp$Temperature_Lebanon[75:length(COVID_Day_series_Lebanon_Temp)]


P_LBN_1<-forecast(ARIMA1_Lebanon,xreg = Test_Lebanon_Temp)
```

Calculate errors:
```{r}
RE_LBN_1<-RE(P_LBN_1$mean,as.vector(COVID_series_Lebanon_Test))
MAPE_LBN_1<-MAPE(P_LBN_1$mean,as.vector(COVID_series_Lebanon_Test))
MSE_LBN_1<-MSE(P_LBN_1$mean,as.vector(COVID_series_Lebanon_Test))
PFA_LBN_1<-PFA(P_LBN_1$mean,as.vector(COVID_series_Lebanon_Test))

Errors.LBN_1<-c(RE_LBN_1,MAPE_LBN_1,MSE_LBN_1,PFA_LBN_1)
Errors.LBN_1
```

Manual MAPE:
```{r}
dd<-data.frame(Actual=as.vector(COVID_series_Lebanon_Test),
               Fore=P_LBN_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
```

*Model without temperature*

ARIMA(0,2,2)
```{r}
#Auto Arima for Lebanon:
ARIMA2_Lebanon<-auto.arima(COVID_series_Lebanon_Train)
summary(ARIMA2_Lebanon)
```

Forecast prediction to compare
```{r}
P_LBN_2<-forecast(ARIMA2_Lebanon,h=length(COVID_series_Lebanon_Test))
```

Calculate errors:
```{r}
RE_LBN_2<-RE(P_LBN_2$mean,as.vector(COVID_series_Lebanon_Test))
MAPE_LBN_2<-MAPE(P_LBN_2$mean,as.vector(COVID_series_Lebanon_Test))
MSE_LBN_2<-MSE(P_LBN_2$mean,as.vector(COVID_series_Lebanon_Test))
PFA_LBN_2<-PFA(P_LBN_2$mean,as.vector(COVID_series_Lebanon_Test))

Errors.LBN_2<-c(RE_LBN_2,MAPE_LBN_2,MSE_LBN_2,PFA_LBN_2)
Errors.LBN_2
```

```{r}
Errors<-rbind(Errors.LBN_1,Errors.LBN_2)

rownames(Errors)<-c("ARIMAX(1,1,2)","ARIMA(0,2,2)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
```

```{r}
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(1,1,2)","ARIMA(0,2,2)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))
```

Conclusion: Keep model without temperature

Make model with the overall series
```{r}
#Auto Arima for Lebanon:

ARIMA_Final_Lebanon<-auto.arima(COVID_Day_series_Lebanon_Temp)
summary(ARIMA_Final_Lebanon)
```

Final forecast:
```{r}
Future_Lebanon_Temp<-Lebanon_Temp$Temperature_Lebanon[Lebanon_Temp$Date>max(COVID_2$Date2)]

P_LBN_Final<-forecast(ARIMA_Final_Lebanon,h=length(Future_Lebanon_Temp))
Low_lim_LBN<-data.frame(P_LBN_Final$lower)[,2]
Upp_lim_LBN<-data.frame(P_LBN_Final$upper)[,2]
```

For making the plot:
```{r}
##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Lebanon_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Lebanon_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Lebanon_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_LBN <- xts(Low_lim_LBN,order.by=per_2)
Forecast_LBN <- xts(P_LBN_Final$mean,order.by=per_2)
Upp_lim_LBN <- xts(Upp_lim_LBN,order.by=per_2)
predNA <- rep(NA, length(Forecast_ITA))
B <- cbind(predNA, Low_lim_LBN, Forecast_LBN, Upp_lim_LBN)

all_series_LBN <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_LBN) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
```

```{r}
dygraph(all_series_LBN, main="SARS-COV2-outbreak: Total Lebanon cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(218/255,55/255,50/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(73/255,163/255,90/255)) %>% 
  dyRangeSelector()

```

## Colombia

![](ColombiaFlag.png){width=40%}

*Model with temperature*

```{r}
Colombia_Temp<-read.csv("Colombia_Temperature.csv",sep=";")
Colombia_Temp$Date<-as.Date(Colombia_Temp$Date,format="%d/%m/%Y")
```

```{r}
COVID_2_Day_Colombia<- COVID_2 %>% 
  filter(Country.Region %in% c("Colombia")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Colombia_Temp<-COVID_2_Day_Colombia %>%
  inner_join(Colombia_Temp,by=c("Date2"="Date"))

COVID_Day_series_Colombia_Temp<-xts(COVID_2_Day_Colombia_Temp$World_confirmed, order.by=COVID_2_Day_Colombia_Temp$Date2)
```

```{r}
COVID_series_Colombia_Train<-COVID_Day_series_Colombia_Temp[1:74] #Until April 4th
COVID_series_Colombia_Test<-COVID_Day_series_Colombia_Temp[75:length(COVID_Day_series_Colombia_Temp)] #From April 5th onwards
```

Get the lagged difference:
```{r}
plot(diff(COVID_series_Colombia_Train),type="l",main="1st ") 
```

Correlograms of first diference:
```{r}
tsdisplay(diff(COVID_series_Colombia_Train))
```

Arima model: ARIMA(2,2,2)
```{r}
#Auto Arima for Colombia:

ARIMA1_Colombia<-auto.arima(COVID_series_Colombia_Train,
                             xreg = COVID_2_Day_Colombia_Temp$Temperature_Colombia[1:74])
#ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Colombia)
```

Forecast prediction to compare
```{r}
Test_Colombia_Temp<-COVID_2_Day_Colombia_Temp$Temperature_Colombia[75:length(COVID_Day_series_Colombia_Temp)]


P_COL_1<-forecast(ARIMA1_Colombia,xreg = Test_Colombia_Temp)
```

Calculate errors:
```{r}
RE_COL_1<-RE(P_COL_1$mean,as.vector(COVID_series_Colombia_Test))
MAPE_COL_1<-MAPE(P_COL_1$mean,as.vector(COVID_series_Colombia_Test))
MSE_COL_1<-MSE(P_COL_1$mean,as.vector(COVID_series_Colombia_Test))
PFA_COL_1<-PFA(P_COL_1$mean,as.vector(COVID_series_Colombia_Test))

Errors.COL_1<-c(RE_COL_1,MAPE_COL_1,PFA_COL_1,MSE_COL_1)
Errors.COL_1
```

Manual MAPE:
```{r}
dd<-data.frame(Actual=as.vector(COVID_series_Colombia_Test),
               Fore=P_COL_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
```

*Model without temperature*

ARIMA(2,2,2)
```{r}
#Auto Arima for Colombia:
ARIMA2_Colombia<-auto.arima(COVID_series_Colombia_Train)
summary(ARIMA2_Colombia)
```

Forecast prediction to compare
```{r}
P_COL_2<-forecast(ARIMA2_Colombia,h=length(COVID_series_Colombia_Test))
```

Calculate errors:
```{r}
RE_COL_2<-RE(P_COL_2$mean,as.vector(COVID_series_Colombia_Test))
MAPE_COL_2<-MAPE(P_COL_2$mean,as.vector(COVID_series_Colombia_Test))
MSE_COL_2<-MSE(P_COL_2$mean,as.vector(COVID_series_Colombia_Test))
PFA_COL_2<-PFA(P_COL_2$mean,as.vector(COVID_series_Colombia_Test))

Errors.COL_2<-c(RE_COL_2,MAPE_COL_2,MSE_COL_2,PFA_COL_2)
Errors.COL_2
```

```{r}
Errors<-rbind(Errors.COL_1,Errors.COL_2)

rownames(Errors)<-c("ARIMAX(2,2,2)","ARIMA(2,2,2)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
```

```{r}
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(2,2,2)","ARIMA(2,2,2)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))
```

Conclusion: Keep model without temperature

Make model with the overall series
```{r}
#Auto Arima for Colombia:

ARIMA_Final_Colombia<-auto.arima(COVID_Day_series_Colombia_Temp)
summary(ARIMA_Final_Colombia)
```

Final forecast:
```{r}
Future_Colombia_Temp<-Colombia_Temp$Temperature_Colombia[Colombia_Temp$Date>max(COVID_2$Date2)]

P_COL_Final<-forecast(ARIMA_Final_Colombia,h=length(Future_Colombia_Temp))
Low_lim_COL<-data.frame(P_COL_Final$lower)[,2]
Upp_lim_COL<-data.frame(P_COL_Final$upper)[,2]
```

For making the plot:
```{r}
##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Colombia_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Colombia_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Colombia_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_COL <- xts(Low_lim_COL,order.by=per_2)
Forecast_COL <- xts(P_COL_Final$mean,order.by=per_2)
Upp_lim_COL <- xts(Upp_lim_COL,order.by=per_2)
predNA <- rep(NA, length(Forecast_COL))
B <- cbind(predNA, Low_lim_COL, Forecast_COL, Upp_lim_COL)

all_series_COL <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_COL) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
```

```{r}
dygraph(all_series_COL, main="SARS-COV2-outbreak: Total Colombia cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(17/255,57/255,141/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(246/255,209/255,75/255)) %>% 
  dyRangeSelector()

```

## Chile

![](ChileFlag.png){width=40%}

*Model with temperature*

```{r}
Chile_Temp<-read.csv("Chile_Temperature.csv",sep=";")
Chile_Temp$Date<-as.Date(Chile_Temp$Date,format="%d/%m/%Y")
```

```{r}
COVID_2_Day_Chile<- COVID_2 %>% 
  filter(Country.Region %in% c("Chile")) %>% 
  group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))

COVID_2_Day_Chile_Temp<-COVID_2_Day_Chile %>%
  inner_join(Chile_Temp,by=c("Date2"="Date"))

COVID_Day_series_Chile_Temp<-xts(COVID_2_Day_Chile_Temp$World_confirmed, order.by=COVID_2_Day_Chile_Temp$Date2)
```

```{r}
COVID_series_Chile_Train<-COVID_Day_series_Chile_Temp[1:76] #Until April 6th
COVID_series_Chile_Test<-COVID_Day_series_Chile_Temp[77:length(COVID_Day_series_Chile_Temp)] #From April 7th onwards
```

Get the lagged difference:
```{r}
plot(diff(COVID_series_Chile_Train),type="l",main="1st ") 
```

Correlograms of first diference:
```{r}
tsdisplay(diff(COVID_series_Chile_Train))
```

Arima model: ARIMA(1,1,0)
```{r}
#Auto Arima for Chile:

ARIMA1_Chile<-auto.arima(COVID_series_Chile_Train,
                             xreg = COVID_2_Day_Chile_Temp$Temperature_Chile[1:76])
#ARIMA1_Mexico<-Arima(COVID_series_Mexico_Train,order=c(2,2,3),xreg=COVID_2_Day_Mexico_Temp$Temperature_Mexico[1:74])
summary(ARIMA1_Chile)
```

Forecast prediction to compare
```{r}
Test_Chile_Temp<-COVID_2_Day_Chile_Temp$Temperature_Chile[77:length(COVID_Day_series_Chile_Temp)]


P_CHL_1<-forecast(ARIMA1_Chile,xreg = Test_Chile_Temp)
```

Calculate errors:
```{r}
RE_CHL_1<-RE(P_CHL_1$mean,as.vector(COVID_series_Chile_Test))
MAPE_CHL_1<-MAPE(P_CHL_1$mean,as.vector(COVID_series_Chile_Test))
MSE_CHL_1<-MSE(P_CHL_1$mean,as.vector(COVID_series_Chile_Test))
PFA_CHL_1<-PFA(P_CHL_1$mean,as.vector(COVID_series_Chile_Test))

Errors.CHL_1<-c(RE_CHL_1,MAPE_CHL_1,MSE_CHL_1,PFA_CHL_1)
Errors.CHL_1
```

Manual MAPE:
```{r}
dd<-data.frame(Actual=as.vector(COVID_series_Chile_Test),
               Fore=P_CHL_1$mean
)
dd<-dd %>% mutate(Abs_Per_Error=abs(Actual-Fore)/abs(Actual))
mean(dd$Abs_Per_Error)*100
```

*Model without temperature*

ARIMA(1,2,4)
```{r}
#Auto Arima for Colombia:
ARIMA2_Chile<-auto.arima(COVID_series_Chile_Train)
summary(ARIMA2_Chile)
```

Forecast prediction to compare
```{r}
P_CHL_2<-forecast(ARIMA2_Chile,h=length(COVID_series_Chile_Test))
```

Calculate errors:
```{r}
RE_CHL_2<-RE(P_CHL_2$mean,as.vector(COVID_series_Chile_Test))
MAPE_CHL_2<-MAPE(P_CHL_2$mean,as.vector(COVID_series_Chile_Test))
MSE_CHL_2<-MSE(P_CHL_2$mean,as.vector(COVID_series_Chile_Test))
PFA_CHL_2<-PFA(P_CHL_2$mean,as.vector(COVID_series_Chile_Test))

Errors.CHL_2<-c(RE_CHL_2,MAPE_CHL_2,MSE_CHL_2,PFA_CHL_2)
Errors.CHL_2
```

```{r}
Errors<-rbind(Errors.CHL_1,Errors.CHL_2)

rownames(Errors)<-c("ARIMAX(1,1,0)","ARIMA(1,2,4)")

colnames(Errors)<-c("Relative error","Mean Abs % error","Mean Squared Error","PFA")

Errors<-as.data.frame(Errors)

maximum<-apply(Errors,2,max)

minimum<-apply(Errors,2,min)

Errors<-rbind(minimum,Errors)

Errors<-rbind(maximum,Errors)
```

```{r}
radarchart(Errors,maxmin=TRUE,axistype=4,axislabcol="slategray4",
           centerzero=FALSE,seg=8,cglcol="gray67",
           pcol=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"),
           plty=1,
           plwd=3,
           title="Error comparison")

legend <-legend(1.5,1, legend=c("ARIMAX(1,1,0)","ARIMA(1,2,4)"),
                 seg.len=-1.4,
                 title="Errors",
                 pch=21, 
                 bty="n" ,lwd=3, y.intersp=1, horiz=FALSE,
                 col=c("dodgerblue2","firebrick2","darkorange2","darkorchid2"))
```

Conclusion: Keep model without temperature

Make model with the overall series
```{r}
#Auto Arima for Chile:

ARIMA_Final_Chile<-auto.arima(COVID_Day_series_Chile_Temp)
summary(ARIMA_Final_Chile)
```

Final forecast:
```{r}
Future_Chile_Temp<-Chile_Temp$Temperature_Chile[Chile_Temp$Date>max(COVID_2$Date2)]

P_CHL_Final<-forecast(ARIMA_Final_Chile,h=length(Future_Chile_Temp))
Low_lim_CHL<-data.frame(P_CHL_Final$lower)[,2]
Upp_lim_CHL<-data.frame(P_CHL_Final$upper)[,2]
```

For making the plot:
```{r}
##Data periods
per_1 <- as.Date(as.character(COVID_2_Day_Chile_Temp$Date2))
per_2 <- seq(as.Date(max(COVID_2_Day_Chile_Temp$Date2)+1,format="%Y-%m-%d"), as.Date("2020-04-30",format="%Y-%m-%d"),"1 day")


# Merge forecast + actuals
data <- xts(COVID_Day_series_Chile_Temp,order.by=per_1) 
dataNA <- rep(NA, length(data))
A <- cbind(data,dataNA,dataNA,dataNA)


Low_lim_CHL <- xts(Low_lim_CHL,order.by=per_2)
Forecast_CHL <- xts(P_CHL_Final$mean,order.by=per_2)
Upp_lim_CHL <- xts(Upp_lim_CHL,order.by=per_2)
predNA <- rep(NA, length(Forecast_CHL))
B <- cbind(predNA, Low_lim_CHL, Forecast_CHL, Upp_lim_CHL)

all_series_CHL <- data.frame(rbind(as.matrix(A),as.matrix(B)))
colnames(all_series_CHL) <- c('Actual', 'Lower_limit', 'Forecast', 'Upper_limit')
```

```{r}
dygraph(all_series_CHL, main="SARS-COV2-outbreak: Total Chile cases",xlab="Date", ylab="Novel coronavirus cases",width = 750)%>%
  dySeries(c('Lower_limit', 'Forecast', 'Upper_limit'),label="Forecast",strokeWidth=2,
           drawPoints = TRUE, pointSize = 2, color=rgb(196/255,60/255,44/255)) %>%
  dySeries("Actual",drawPoints = TRUE, strokeWidth=2, pointSize = 2,
           color=rgb(16/255,59/255,160/255)) %>% 
  dyRangeSelector()

```



```{r}
#Forecasts<-list(all_series_CRI,all_series_MEX,all_series_ITA,all_series_LBN,all_series_COL,
                #all_series_CHL)
#names(Forecasts)<-c("Costa Rica","Mexico","Italy","Lebanon","Colombia","Chile")
#Forecasts$Mexico

#save(Forecasts, file="Forecasts.RData")
```


